|
hipblas
v4.0.5.0
prototype interfaces compatible with ROCm platform and HiP
|
#include "hipblas-export.h"#include "hipblas-version.h"#include <hip/hip_runtime_api.h>#include <stdint.h>Go to the source code of this file.
Classes | |
| struct | hipblasBfloat16 |
| Struct to represent a 16 bit Brain floating-point number. More... | |
| struct | hipblasComplex |
| Struct to represent a complex number with single precision real and imaginary parts. More... | |
| struct | hipblasDoubleComplex |
| Struct to represent a complex number with double precision real and imaginary parts. More... | |
Macros | |
| #define | HIPBLAS_CLANG_STATIC |
Typedefs | |
| typedef void * | hipblasHandle_t |
| hipblasHanlde_t is a void pointer, to store the library context (either rocBLAS or cuBLAS) More... | |
| typedef uint16_t | hipblasHalf |
| To specify the datatype to be unsigned short. More... | |
| typedef int8_t | hipblasInt8 |
| To specify the datatype to be signed char. More... | |
| typedef int64_t | hipblasStride |
| Stride between matrices or vectors in strided_batched functions. More... | |
Enumerations | |
| enum | hipblasStatus_t { HIPBLAS_STATUS_SUCCESS = 0, HIPBLAS_STATUS_NOT_INITIALIZED = 1, HIPBLAS_STATUS_ALLOC_FAILED = 2, HIPBLAS_STATUS_INVALID_VALUE = 3, HIPBLAS_STATUS_MAPPING_ERROR = 4, HIPBLAS_STATUS_EXECUTION_FAILED = 5, HIPBLAS_STATUS_INTERNAL_ERROR = 6, HIPBLAS_STATUS_NOT_SUPPORTED = 7, HIPBLAS_STATUS_ARCH_MISMATCH = 8, HIPBLAS_STATUS_HANDLE_IS_NULLPTR = 9, HIPBLAS_STATUS_INVALID_ENUM = 10, HIPBLAS_STATUS_UNKNOWN = 11 } |
| hipblas status codes definition More... | |
| enum | hipblasOperation_t { HIPBLAS_OP_N = 111, HIPBLAS_OP_T = 112, HIPBLAS_OP_C = 113 } |
| Used to specify whether the matrix is to be transposed or not. More... | |
| enum | hipblasPointerMode_t { HIPBLAS_POINTER_MODE_HOST, HIPBLAS_POINTER_MODE_DEVICE } |
| Indicates if scalar pointers are on host or device. This is used for scalars alpha and beta and for scalar function return values. More... | |
| enum | hipblasFillMode_t { HIPBLAS_FILL_MODE_UPPER = 121, HIPBLAS_FILL_MODE_LOWER = 122, HIPBLAS_FILL_MODE_FULL = 123 } |
| Used by the Hermitian, symmetric and triangular matrix routines to specify whether the upper or lower triangle is being referenced. More... | |
| enum | hipblasDiagType_t { HIPBLAS_DIAG_NON_UNIT = 131, HIPBLAS_DIAG_UNIT = 132 } |
| It is used by the triangular matrix routines to specify whether the matrix is unit triangular. More... | |
| enum | hipblasSideMode_t { HIPBLAS_SIDE_LEFT = 141, HIPBLAS_SIDE_RIGHT = 142, HIPBLAS_SIDE_BOTH = 143 } |
| Indicates the side matrix A is located relative to matrix B during multiplication. More... | |
| enum | hipblasDatatype_t { HIPBLAS_R_16F = 150, HIPBLAS_R_32F = 151, HIPBLAS_R_64F = 152, HIPBLAS_C_16F = 153, HIPBLAS_C_32F = 154, HIPBLAS_C_64F = 155, HIPBLAS_R_8I = 160, HIPBLAS_R_8U = 161, HIPBLAS_R_32I = 162, HIPBLAS_R_32U = 163, HIPBLAS_C_8I = 164, HIPBLAS_C_8U = 165, HIPBLAS_C_32I = 166, HIPBLAS_C_32U = 167, HIPBLAS_R_16B = 168, HIPBLAS_C_16B = 169, HIPBLAS_DATATYPE_INVALID = 255 } |
| Indicates the precision width of data stored in a blas type. More... | |
| enum | hipblasGemmAlgo_t { HIPBLAS_GEMM_DEFAULT = 160 } |
| Indicates if layer is active with bitmask. More... | |
| enum | hipblasAtomicsMode_t { HIPBLAS_ATOMICS_NOT_ALLOWED = 0, HIPBLAS_ATOMICS_ALLOWED = 1 } |
| Indicates if atomics operations are allowed. Not allowing atomic operations may generally improve determinism and repeatability of results at a cost of performance. More... | |
Functions | |
| hipblasStatus_t | hipblasCreate (hipblasHandle_t *handle) |
| Create hipblas handle. More... | |
| hipblasStatus_t | hipblasDestroy (hipblasHandle_t handle) |
| Destroys the library context created using hipblasCreate() More... | |
| hipblasStatus_t | hipblasSetStream (hipblasHandle_t handle, hipStream_t streamId) |
| Set stream for handle. More... | |
| hipblasStatus_t | hipblasGetStream (hipblasHandle_t handle, hipStream_t *streamId) |
| Get stream[0] for handle. More... | |
| hipblasStatus_t | hipblasSetPointerMode (hipblasHandle_t handle, hipblasPointerMode_t mode) |
| Set hipblas pointer mode. More... | |
| hipblasStatus_t | hipblasGetPointerMode (hipblasHandle_t handle, hipblasPointerMode_t *mode) |
| Get hipblas pointer mode. More... | |
| hipblasStatus_t | hipblasSetVector (int n, int elemSize, const void *x, int incx, void *y, int incy) |
| copy vector from host to device More... | |
| hipblasStatus_t | hipblasGetVector (int n, int elemSize, const void *x, int incx, void *y, int incy) |
| copy vector from device to host More... | |
| hipblasStatus_t | hipblasSetMatrix (int rows, int cols, int elemSize, const void *AP, int lda, void *BP, int ldb) |
| copy matrix from host to device More... | |
| hipblasStatus_t | hipblasGetMatrix (int rows, int cols, int elemSize, const void *AP, int lda, void *BP, int ldb) |
| copy matrix from device to host More... | |
| hipblasStatus_t | hipblasSetVectorAsync (int n, int elemSize, const void *x, int incx, void *y, int incy, hipStream_t stream) |
| asynchronously copy vector from host to device More... | |
| hipblasStatus_t | hipblasGetVectorAsync (int n, int elemSize, const void *x, int incx, void *y, int incy, hipStream_t stream) |
| asynchronously copy vector from device to host More... | |
| hipblasStatus_t | hipblasSetMatrixAsync (int rows, int cols, int elemSize, const void *AP, int lda, void *BP, int ldb, hipStream_t stream) |
| asynchronously copy matrix from host to device More... | |
| hipblasStatus_t | hipblasGetMatrixAsync (int rows, int cols, int elemSize, const void *AP, int lda, void *BP, int ldb, hipStream_t stream) |
| asynchronously copy matrix from device to host More... | |
| hipblasStatus_t | hipblasSetAtomicsMode (hipblasHandle_t handle, hipblasAtomicsMode_t atomics_mode) |
| Set hipblasSetAtomicsMode. More... | |
| hipblasStatus_t | hipblasGetAtomicsMode (hipblasHandle_t handle, hipblasAtomicsMode_t *atomics_mode) |
| Get hipblasSetAtomicsMode. More... | |
| hipblasStatus_t | hipblasStrmmOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *AP, int lda, const float *BP, int ldb, float *CP, int ldc) |
| hipblasStatus_t | hipblasDtrmmOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *AP, int lda, const double *BP, int ldb, double *CP, int ldc) |
| hipblasStatus_t | hipblasCtrmmOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, hipblasComplex *CP, int ldc) |
| hipblasStatus_t | hipblasZtrmmOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, hipblasDoubleComplex *CP, int ldc) |
| hipblasStatus_t | hipblasStrmmBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *const AP[], int lda, const float *const BP[], int ldb, float *const CP[], int ldc, int batchCount) |
| hipblasStatus_t | hipblasDtrmmBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *const AP[], int lda, const double *const BP[], int ldb, double *const CP[], int ldc, int batchCount) |
| hipblasStatus_t | hipblasCtrmmBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, hipblasComplex *const CP[], int ldc, int batchCount) |
| hipblasStatus_t | hipblasZtrmmBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| hipblasStatus_t | hipblasStrmmStridedBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *BP, int ldb, hipblasStride strideB, float *CP, int ldc, hipblasStride strideC, int batchCount) |
| hipblasStatus_t | hipblasDtrmmStridedBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *BP, int ldb, hipblasStride strideB, double *CP, int ldc, hipblasStride strideC, int batchCount) |
| hipblasStatus_t | hipblasCtrmmStridedBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *BP, int ldb, hipblasStride strideB, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| hipblasStatus_t | hipblasZtrmmStridedBatchedOutofplace (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, hipblasDoubleComplex *BC, int ldc, hipblasStride strideC, int batchCount) |
| hipblasStatus_t | hipblasGemmEx (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void *alpha, const void *A, hipblasDatatype_t aType, int lda, const void *B, hipblasDatatype_t bType, int ldb, const void *beta, void *C, hipblasDatatype_t cType, int ldc, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasGemmBatchedEx (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void *alpha, const void *A[], hipblasDatatype_t aType, int lda, const void *B[], hipblasDatatype_t bType, int ldb, const void *beta, void *C[], hipblasDatatype_t cType, int ldc, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasGemmStridedBatchedEx (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const void *alpha, const void *A, hipblasDatatype_t aType, int lda, hipblasStride strideA, const void *B, hipblasDatatype_t bType, int ldb, hipblasStride strideB, const void *beta, void *C, hipblasDatatype_t cType, int ldc, hipblasStride strideC, int batchCount, hipblasDatatype_t computeType, hipblasGemmAlgo_t algo) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasTrsmEx (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const void *alpha, void *A, int lda, void *B, int ldb, const void *invA, int invAsize, hipblasDatatype_t computeType) |
| hipblasStatus_t | hipblasTrsmBatchedEx (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const void *alpha, void *A, int lda, void *B, int ldb, int batchCount, const void *invA, int invAsize, hipblasDatatype_t computeType) |
| hipblasStatus_t | hipblasTrsmStridedBatchedEx (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const void *alpha, void *A, int lda, hipblasStride strideA, void *B, int ldb, hipblasStride strideB, int batchCount, const void *invA, int invAsize, hipblasStride strideInvA, hipblasDatatype_t computeType) |
| hipblasStatus_t | hipblasAxpyEx (hipblasHandle_t handle, int n, const void *alpha, hipblasDatatype_t alphaType, const void *x, hipblasDatatype_t xType, int incx, void *y, hipblasDatatype_t yType, int incy, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasAxpyBatchedEx (hipblasHandle_t handle, int n, const void *alpha, hipblasDatatype_t alphaType, const void *x, hipblasDatatype_t xType, int incx, void *y, hipblasDatatype_t yType, int incy, int batchCount, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasAxpyStridedBatchedEx (hipblasHandle_t handle, int n, const void *alpha, hipblasDatatype_t alphaType, const void *x, hipblasDatatype_t xType, int incx, hipblasStride stridex, void *y, hipblasDatatype_t yType, int incy, hipblasStride stridey, int batchCount, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasNrm2Ex (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS_EX API. More... | |
| hipblasStatus_t | hipblasNrm2BatchedEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, int batchCount, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS_EX API. More... | |
| hipblasStatus_t | hipblasNrm2StridedBatchedEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, hipblasStride stridex, int batchCount, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS_EX API. More... | |
| hipblasStatus_t | hipblasRotEx (hipblasHandle_t handle, int n, void *x, hipblasDatatype_t xType, int incx, void *y, hipblasDatatype_t yType, int incy, const void *c, const void *s, hipblasDatatype_t csType, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasRotBatchedEx (hipblasHandle_t handle, int n, void *x, hipblasDatatype_t xType, int incx, void *y, hipblasDatatype_t yType, int incy, const void *c, const void *s, hipblasDatatype_t csType, int batchCount, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasRotStridedBatchedEx (hipblasHandle_t handle, int n, void *x, hipblasDatatype_t xType, int incx, hipblasStride stridex, void *y, hipblasDatatype_t yType, int incy, hipblasStride stridey, const void *c, const void *s, hipblasDatatype_t csType, int batchCount, hipblasDatatype_t executionType) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScalEx (hipblasHandle_t handle, int n, const void *alpha, hipblasDatatype_t alphaType, void *x, hipblasDatatype_t xType, int incx, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasScalBatchedEx (hipblasHandle_t handle, int n, const void *alpha, hipblasDatatype_t alphaType, void *x, hipblasDatatype_t xType, int incx, int batchCount, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasScalStridedBatchedEx (hipblasHandle_t handle, int n, const void *alpha, hipblasDatatype_t alphaType, void *x, hipblasDatatype_t xType, int incx, hipblasStride stridex, int batchCount, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| const char * | hipblasStatusToString (hipblasStatus_t status) |
| hipblasStatus_t | hipblasIsamax (hipblasHandle_t handle, int n, const float *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIdamax (hipblasHandle_t handle, int n, const double *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIcamax (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIzamax (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIsamaxBatched (hipblasHandle_t handle, int n, const float *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIdamaxBatched (hipblasHandle_t handle, int n, const double *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIcamaxBatched (hipblasHandle_t handle, int n, const hipblasComplex *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIzamaxBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIsamaxStridedBatched (hipblasHandle_t handle, int n, const float *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIdamaxStridedBatched (hipblasHandle_t handle, int n, const double *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIcamaxStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIzamaxStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIsamin (hipblasHandle_t handle, int n, const float *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIdamin (hipblasHandle_t handle, int n, const double *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIcamin (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIzamin (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIsaminBatched (hipblasHandle_t handle, int n, const float *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIdaminBatched (hipblasHandle_t handle, int n, const double *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIcaminBatched (hipblasHandle_t handle, int n, const hipblasComplex *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIzaminBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *const x[], int incx, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIsaminStridedBatched (hipblasHandle_t handle, int n, const float *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIdaminStridedBatched (hipblasHandle_t handle, int n, const double *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIcaminStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasIzaminStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount, int *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSasum (hipblasHandle_t handle, int n, const float *x, int incx, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDasum (hipblasHandle_t handle, int n, const double *x, int incx, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScasum (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDzasum (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSasumBatched (hipblasHandle_t handle, int n, const float *const x[], int incx, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDasumBatched (hipblasHandle_t handle, int n, const double *const x[], int incx, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScasumBatched (hipblasHandle_t handle, int n, const hipblasComplex *const x[], int incx, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDzasumBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *const x[], int incx, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSasumStridedBatched (hipblasHandle_t handle, int n, const float *x, int incx, hipblasStride stridex, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDasumStridedBatched (hipblasHandle_t handle, int n, const double *x, int incx, hipblasStride stridex, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScasumStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasStride stridex, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDzasumStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasHaxpy (hipblasHandle_t handle, int n, const hipblasHalf *alpha, const hipblasHalf *x, int incx, hipblasHalf *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSaxpy (hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDaxpy (hipblasHandle_t handle, int n, const double *alpha, const double *x, int incx, double *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCaxpy (hipblasHandle_t handle, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasComplex *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZaxpy (hipblasHandle_t handle, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasHaxpyBatched (hipblasHandle_t handle, int n, const hipblasHalf *alpha, const hipblasHalf *const x[], int incx, hipblasHalf *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSaxpyBatched (hipblasHandle_t handle, int n, const float *alpha, const float *const x[], int incx, float *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDaxpyBatched (hipblasHandle_t handle, int n, const double *alpha, const double *const x[], int incx, double *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCaxpyBatched (hipblasHandle_t handle, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZaxpyBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasHaxpyStridedBatched (hipblasHandle_t handle, int n, const hipblasHalf *alpha, const hipblasHalf *x, int incx, hipblasStride stridex, hipblasHalf *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSaxpyStridedBatched (hipblasHandle_t handle, int n, const float *alpha, const float *x, int incx, hipblasStride stridex, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDaxpyStridedBatched (hipblasHandle_t handle, int n, const double *alpha, const double *x, int incx, hipblasStride stridex, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCaxpyStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZaxpyStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScopy (hipblasHandle_t handle, int n, const float *x, int incx, float *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDcopy (hipblasHandle_t handle, int n, const double *x, int incx, double *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCcopy (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasComplex *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZcopy (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScopyBatched (hipblasHandle_t handle, int n, const float *const x[], int incx, float *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDcopyBatched (hipblasHandle_t handle, int n, const double *const x[], int incx, double *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCcopyBatched (hipblasHandle_t handle, int n, const hipblasComplex *const x[], int incx, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZcopyBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScopyStridedBatched (hipblasHandle_t handle, int n, const float *x, int incx, hipblasStride stridex, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDcopyStridedBatched (hipblasHandle_t handle, int n, const double *x, int incx, hipblasStride stridex, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCcopyStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZcopyStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasHdot (hipblasHandle_t handle, int n, const hipblasHalf *x, int incx, const hipblasHalf *y, int incy, hipblasHalf *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasBfdot (hipblasHandle_t handle, int n, const hipblasBfloat16 *x, int incx, const hipblasBfloat16 *y, int incy, hipblasBfloat16 *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSdot (hipblasHandle_t handle, int n, const float *x, int incx, const float *y, int incy, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDdot (hipblasHandle_t handle, int n, const double *x, int incx, const double *y, int incy, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCdotc (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, const hipblasComplex *y, int incy, hipblasComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCdotu (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, const hipblasComplex *y, int incy, hipblasComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdotc (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *y, int incy, hipblasDoubleComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdotu (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *y, int incy, hipblasDoubleComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasHdotBatched (hipblasHandle_t handle, int n, const hipblasHalf *const x[], int incx, const hipblasHalf *const y[], int incy, int batchCount, hipblasHalf *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasBfdotBatched (hipblasHandle_t handle, int n, const hipblasBfloat16 *const x[], int incx, const hipblasBfloat16 *const y[], int incy, int batchCount, hipblasBfloat16 *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSdotBatched (hipblasHandle_t handle, int n, const float *const x[], int incx, const float *const y[], int incy, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDdotBatched (hipblasHandle_t handle, int n, const double *const x[], int incx, const double *const y[], int incy, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCdotcBatched (hipblasHandle_t handle, int n, const hipblasComplex *const x[], int incx, const hipblasComplex *const y[], int incy, int batchCount, hipblasComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCdotuBatched (hipblasHandle_t handle, int n, const hipblasComplex *const x[], int incx, const hipblasComplex *const y[], int incy, int batchCount, hipblasComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdotcBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *const y[], int incy, int batchCount, hipblasDoubleComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdotuBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *const y[], int incy, int batchCount, hipblasDoubleComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasHdotStridedBatched (hipblasHandle_t handle, int n, const hipblasHalf *x, int incx, hipblasStride stridex, const hipblasHalf *y, int incy, hipblasStride stridey, int batchCount, hipblasHalf *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasBfdotStridedBatched (hipblasHandle_t handle, int n, const hipblasBfloat16 *x, int incx, hipblasStride stridex, const hipblasBfloat16 *y, int incy, hipblasStride stridey, int batchCount, hipblasBfloat16 *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSdotStridedBatched (hipblasHandle_t handle, int n, const float *x, int incx, hipblasStride stridex, const float *y, int incy, hipblasStride stridey, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDdotStridedBatched (hipblasHandle_t handle, int n, const double *x, int incx, hipblasStride stridex, const double *y, int incy, hipblasStride stridey, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCdotcStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *y, int incy, hipblasStride stridey, int batchCount, hipblasComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCdotuStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *y, int incy, hipblasStride stridey, int batchCount, hipblasComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdotcStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount, hipblasDoubleComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdotuStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount, hipblasDoubleComplex *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSnrm2 (hipblasHandle_t handle, int n, const float *x, int incx, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDnrm2 (hipblasHandle_t handle, int n, const double *x, int incx, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScnrm2 (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDznrm2 (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSnrm2Batched (hipblasHandle_t handle, int n, const float *const x[], int incx, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDnrm2Batched (hipblasHandle_t handle, int n, const double *const x[], int incx, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScnrm2Batched (hipblasHandle_t handle, int n, const hipblasComplex *const x[], int incx, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDznrm2Batched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *const x[], int incx, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSnrm2StridedBatched (hipblasHandle_t handle, int n, const float *x, int incx, hipblasStride stridex, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDnrm2StridedBatched (hipblasHandle_t handle, int n, const double *x, int incx, hipblasStride stridex, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasScnrm2StridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *x, int incx, hipblasStride stridex, int batchCount, float *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDznrm2StridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount, double *result) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrot (hipblasHandle_t handle, int n, float *x, int incx, float *y, int incy, const float *c, const float *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrot (hipblasHandle_t handle, int n, double *x, int incx, double *y, int incy, const double *c, const double *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCrot (hipblasHandle_t handle, int n, hipblasComplex *x, int incx, hipblasComplex *y, int incy, const float *c, const hipblasComplex *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCsrot (hipblasHandle_t handle, int n, hipblasComplex *x, int incx, hipblasComplex *y, int incy, const float *c, const float *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZrot (hipblasHandle_t handle, int n, hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *y, int incy, const double *c, const hipblasDoubleComplex *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdrot (hipblasHandle_t handle, int n, hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *y, int incy, const double *c, const double *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotBatched (hipblasHandle_t handle, int n, float *const x[], int incx, float *const y[], int incy, const float *c, const float *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotBatched (hipblasHandle_t handle, int n, double *const x[], int incx, double *const y[], int incy, const double *c, const double *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCrotBatched (hipblasHandle_t handle, int n, hipblasComplex *const x[], int incx, hipblasComplex *const y[], int incy, const float *c, const hipblasComplex *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCsrotBatched (hipblasHandle_t handle, int n, hipblasComplex *const x[], int incx, hipblasComplex *const y[], int incy, const float *c, const float *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZrotBatched (hipblasHandle_t handle, int n, hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const y[], int incy, const double *c, const hipblasDoubleComplex *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdrotBatched (hipblasHandle_t handle, int n, hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const y[], int incy, const double *c, const double *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotStridedBatched (hipblasHandle_t handle, int n, float *x, int incx, hipblasStride stridex, float *y, int incy, hipblasStride stridey, const float *c, const float *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotStridedBatched (hipblasHandle_t handle, int n, double *x, int incx, hipblasStride stridex, double *y, int incy, hipblasStride stridey, const double *c, const double *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCrotStridedBatched (hipblasHandle_t handle, int n, hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *y, int incy, hipblasStride stridey, const float *c, const hipblasComplex *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCsrotStridedBatched (hipblasHandle_t handle, int n, hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *y, int incy, hipblasStride stridey, const float *c, const float *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZrotStridedBatched (hipblasHandle_t handle, int n, hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *y, int incy, hipblasStride stridey, const double *c, const hipblasDoubleComplex *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdrotStridedBatched (hipblasHandle_t handle, int n, hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *y, int incy, hipblasStride stridey, const double *c, const double *s, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotg (hipblasHandle_t handle, float *a, float *b, float *c, float *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotg (hipblasHandle_t handle, double *a, double *b, double *c, double *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCrotg (hipblasHandle_t handle, hipblasComplex *a, hipblasComplex *b, float *c, hipblasComplex *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZrotg (hipblasHandle_t handle, hipblasDoubleComplex *a, hipblasDoubleComplex *b, double *c, hipblasDoubleComplex *s) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotgBatched (hipblasHandle_t handle, float *const a[], float *const b[], float *const c[], float *const s[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotgBatched (hipblasHandle_t handle, double *const a[], double *const b[], double *const c[], double *const s[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCrotgBatched (hipblasHandle_t handle, hipblasComplex *const a[], hipblasComplex *const b[], float *const c[], hipblasComplex *const s[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZrotgBatched (hipblasHandle_t handle, hipblasDoubleComplex *const a[], hipblasDoubleComplex *const b[], double *const c[], hipblasDoubleComplex *const s[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotgStridedBatched (hipblasHandle_t handle, float *a, hipblasStride stridea, float *b, hipblasStride strideb, float *c, hipblasStride stridec, float *s, hipblasStride strides, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotgStridedBatched (hipblasHandle_t handle, double *a, hipblasStride stridea, double *b, hipblasStride strideb, double *c, hipblasStride stridec, double *s, hipblasStride strides, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCrotgStridedBatched (hipblasHandle_t handle, hipblasComplex *a, hipblasStride stridea, hipblasComplex *b, hipblasStride strideb, float *c, hipblasStride stridec, hipblasComplex *s, hipblasStride strides, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZrotgStridedBatched (hipblasHandle_t handle, hipblasDoubleComplex *a, hipblasStride stridea, hipblasDoubleComplex *b, hipblasStride strideb, double *c, hipblasStride stridec, hipblasDoubleComplex *s, hipblasStride strides, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotm (hipblasHandle_t handle, int n, float *x, int incx, float *y, int incy, const float *param) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotm (hipblasHandle_t handle, int n, double *x, int incx, double *y, int incy, const double *param) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotmBatched (hipblasHandle_t handle, int n, float *const x[], int incx, float *const y[], int incy, const float *const param[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotmBatched (hipblasHandle_t handle, int n, double *const x[], int incx, double *const y[], int incy, const double *const param[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotmStridedBatched (hipblasHandle_t handle, int n, float *x, int incx, hipblasStride stridex, float *y, int incy, hipblasStride stridey, const float *param, hipblasStride strideParam, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotmStridedBatched (hipblasHandle_t handle, int n, double *x, int incx, hipblasStride stridex, double *y, int incy, hipblasStride stridey, const double *param, hipblasStride strideParam, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotmg (hipblasHandle_t handle, float *d1, float *d2, float *x1, const float *y1, float *param) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotmg (hipblasHandle_t handle, double *d1, double *d2, double *x1, const double *y1, double *param) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotmgBatched (hipblasHandle_t handle, float *const d1[], float *const d2[], float *const x1[], const float *const y1[], float *const param[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotmgBatched (hipblasHandle_t handle, double *const d1[], double *const d2[], double *const x1[], const double *const y1[], double *const param[], int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSrotmgStridedBatched (hipblasHandle_t handle, float *d1, hipblasStride strided1, float *d2, hipblasStride strided2, float *x1, hipblasStride stridex1, const float *y1, hipblasStride stridey1, float *param, hipblasStride strideParam, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDrotmgStridedBatched (hipblasHandle_t handle, double *d1, hipblasStride strided1, double *d2, hipblasStride strided2, double *x1, hipblasStride stridex1, const double *y1, hipblasStride stridey1, double *param, hipblasStride strideParam, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSscal (hipblasHandle_t handle, int n, const float *alpha, float *x, int incx) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDscal (hipblasHandle_t handle, int n, const double *alpha, double *x, int incx) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCscal (hipblasHandle_t handle, int n, const hipblasComplex *alpha, hipblasComplex *x, int incx) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCsscal (hipblasHandle_t handle, int n, const float *alpha, hipblasComplex *x, int incx) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZscal (hipblasHandle_t handle, int n, const hipblasDoubleComplex *alpha, hipblasDoubleComplex *x, int incx) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdscal (hipblasHandle_t handle, int n, const double *alpha, hipblasDoubleComplex *x, int incx) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSscalBatched (hipblasHandle_t handle, int n, const float *alpha, float *const x[], int incx, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDscalBatched (hipblasHandle_t handle, int n, const double *alpha, double *const x[], int incx, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCscalBatched (hipblasHandle_t handle, int n, const hipblasComplex *alpha, hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZscalBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *alpha, hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCsscalBatched (hipblasHandle_t handle, int n, const float *alpha, hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdscalBatched (hipblasHandle_t handle, int n, const double *alpha, hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSscalStridedBatched (hipblasHandle_t handle, int n, const float *alpha, float *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDscalStridedBatched (hipblasHandle_t handle, int n, const double *alpha, double *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCscalStridedBatched (hipblasHandle_t handle, int n, const hipblasComplex *alpha, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZscalStridedBatched (hipblasHandle_t handle, int n, const hipblasDoubleComplex *alpha, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCsscalStridedBatched (hipblasHandle_t handle, int n, const float *alpha, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZdscalStridedBatched (hipblasHandle_t handle, int n, const double *alpha, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSswap (hipblasHandle_t handle, int n, float *x, int incx, float *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDswap (hipblasHandle_t handle, int n, double *x, int incx, double *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCswap (hipblasHandle_t handle, int n, hipblasComplex *x, int incx, hipblasComplex *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZswap (hipblasHandle_t handle, int n, hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *y, int incy) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSswapBatched (hipblasHandle_t handle, int n, float *const x[], int incx, float *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDswapBatched (hipblasHandle_t handle, int n, double *const x[], int incx, double *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCswapBatched (hipblasHandle_t handle, int n, hipblasComplex *const x[], int incx, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZswapBatched (hipblasHandle_t handle, int n, hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSswapStridedBatched (hipblasHandle_t handle, int n, float *x, int incx, hipblasStride stridex, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasDswapStridedBatched (hipblasHandle_t handle, int n, double *x, int incx, hipblasStride stridex, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasCswapStridedBatched (hipblasHandle_t handle, int n, hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasZswapStridedBatched (hipblasHandle_t handle, int n, hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 1 API. More... | |
| hipblasStatus_t | hipblasSgbmv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const float *alpha, const float *AP, int lda, const float *x, int incx, const float *beta, float *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgbmv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const double *alpha, const double *AP, int lda, const double *x, int incx, const double *beta, double *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgbmv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *x, int incx, const hipblasComplex *beta, hipblasComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgbmv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSgbmvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const float *alpha, const float *const AP[], int lda, const float *const x[], int incx, const float *beta, float *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgbmvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const double *alpha, const double *const AP[], int lda, const double *const x[], int incx, const double *beta, double *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgbmvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const x[], int incx, const hipblasComplex *beta, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgbmvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSgbmvStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *x, int incx, hipblasStride stridex, const float *beta, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgbmvStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *x, int incx, hipblasStride stridex, const double *beta, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgbmvStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *beta, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgbmvStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, int kl, int ku, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSgemv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, const float *AP, int lda, const float *x, int incx, const float *beta, float *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgemv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const double *alpha, const double *AP, int lda, const double *x, int incx, const double *beta, double *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgemv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *x, int incx, const hipblasComplex *beta, hipblasComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgemv (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSgemvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const float *alpha, const float *const AP[], int lda, const float *const x[], int incx, const float *beta, float *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgemvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const double *alpha, const double *const AP[], int lda, const double *const x[], int incx, const double *beta, double *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgemvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const x[], int incx, const hipblasComplex *beta, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgemvBatched (hipblasHandle_t handle, hipblasOperation_t trans, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSgemvStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *x, int incx, hipblasStride stridex, const float *beta, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgemvStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *x, int incx, hipblasStride stridex, const double *beta, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgemvStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *beta, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgemvStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSger (hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDger (hipblasHandle_t handle, int m, int n, const double *alpha, const double *x, int incx, const double *y, int incy, double *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgeru (hipblasHandle_t handle, int m, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, const hipblasComplex *y, int incy, hipblasComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgerc (hipblasHandle_t handle, int m, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, const hipblasComplex *y, int incy, hipblasComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgeru (hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *y, int incy, hipblasDoubleComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgerc (hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *y, int incy, hipblasDoubleComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSgerBatched (hipblasHandle_t handle, int m, int n, const float *alpha, const float *const x[], int incx, const float *const y[], int incy, float *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgerBatched (hipblasHandle_t handle, int m, int n, const double *alpha, const double *const x[], int incx, const double *const y[], int incy, double *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgeruBatched (hipblasHandle_t handle, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, const hipblasComplex *const y[], int incy, hipblasComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgercBatched (hipblasHandle_t handle, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, const hipblasComplex *const y[], int incy, hipblasComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgeruBatched (hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *const y[], int incy, hipblasDoubleComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgercBatched (hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *const y[], int incy, hipblasDoubleComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSgerStridedBatched (hipblasHandle_t handle, int m, int n, const float *alpha, const float *x, int incx, hipblasStride stridex, const float *y, int incy, hipblasStride stridey, float *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDgerStridedBatched (hipblasHandle_t handle, int m, int n, const double *alpha, const double *x, int incx, hipblasStride stridex, const double *y, int incy, hipblasStride stridey, double *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgeruStridedBatched (hipblasHandle_t handle, int m, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *y, int incy, hipblasStride stridey, hipblasComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCgercStridedBatched (hipblasHandle_t handle, int m, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *y, int incy, hipblasStride stridey, hipblasComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgeruStridedBatched (hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *y, int incy, hipblasStride stridey, hipblasDoubleComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZgercStridedBatched (hipblasHandle_t handle, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *y, int incy, hipblasStride stridey, hipblasDoubleComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *x, int incx, const hipblasComplex *beta, hipblasComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const x[], int incx, const hipblasComplex *beta, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *beta, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChemv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *x, int incx, const hipblasComplex *beta, hipblasComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhemv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChemvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const x[], int incx, const hipblasComplex *beta, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhemvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChemvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *beta, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhemvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCher (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const hipblasComplex *x, int incx, hipblasComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZher (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCherBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const hipblasComplex *const x[], int incx, hipblasComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZherBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCherStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZherStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCher2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, const hipblasComplex *y, int incy, hipblasComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZher2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *y, int incy, hipblasDoubleComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCher2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, const hipblasComplex *const y[], int incy, hipblasComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZher2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *const y[], int incy, hipblasDoubleComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCher2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *y, int incy, hipblasStride stridey, hipblasComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZher2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *y, int incy, hipblasStride stridey, hipblasDoubleComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChpmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *AP, const hipblasComplex *x, int incx, const hipblasComplex *beta, hipblasComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhpmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChpmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], const hipblasComplex *const x[], int incx, const hipblasComplex *beta, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhpmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChpmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *AP, hipblasStride strideA, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *beta, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhpmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, hipblasStride strideA, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChpr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const hipblasComplex *x, int incx, hipblasComplex *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhpr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChprBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const hipblasComplex *const x[], int incx, hipblasComplex *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhprBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChprStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhprStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChpr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, const hipblasComplex *y, int incy, hipblasComplex *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhpr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *y, int incy, hipblasDoubleComplex *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChpr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, const hipblasComplex *const y[], int incy, hipblasComplex *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhpr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *const y[], int incy, hipblasDoubleComplex *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasChpr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *y, int incy, hipblasStride stridey, hipblasComplex *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZhpr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *y, int incy, hipblasStride stridey, hipblasDoubleComplex *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float *alpha, const float *AP, int lda, const float *x, int incx, const float *beta, float *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double *alpha, const double *AP, int lda, const double *x, int incx, const double *beta, double *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float *alpha, const float *const AP[], int lda, const float *const x[], int incx, const float *beta, float *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double *alpha, const double *const AP[], int lda, const double *const x[], int incx, const double *beta, double *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *x, int incx, hipblasStride stridex, const float *beta, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, int k, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *x, int incx, hipblasStride stridex, const double *beta, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSspmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *AP, const float *x, int incx, const float *beta, float *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDspmv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *AP, const double *x, int incx, const double *beta, double *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSspmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *const AP[], const float *const x[], int incx, const float *beta, float *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDspmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *const AP[], const double *const x[], int incx, const double *beta, double *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSspmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *AP, hipblasStride strideA, const float *x, int incx, hipblasStride stridex, const float *beta, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDspmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *AP, hipblasStride strideA, const double *x, int incx, hipblasStride stridex, const double *beta, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSspr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, float *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDspr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, double *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCspr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasComplex *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZspr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsprBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *const x[], int incx, float *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsprBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *const x[], int incx, double *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsprBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, hipblasComplex *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsprBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsprStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, hipblasStride stridex, float *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsprStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, hipblasStride stridex, double *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsprStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsprStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSspr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDspr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, const double *y, int incy, double *AP) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSspr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *const x[], int incx, const float *const y[], int incy, float *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDspr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *const x[], int incx, const double *const y[], int incy, double *const AP[], int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSspr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, hipblasStride stridex, const float *y, int incy, hipblasStride stridey, float *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDspr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, hipblasStride stridex, const double *y, int incy, hipblasStride stridey, double *AP, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsymv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *AP, int lda, const float *x, int incx, const float *beta, float *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsymv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *AP, int lda, const double *x, int incx, const double *beta, double *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsymv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *x, int incx, const hipblasComplex *beta, hipblasComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsymv (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsymvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *const AP[], int lda, const float *const x[], int incx, const float *beta, float *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsymvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *const AP[], int lda, const double *const x[], int incx, const double *beta, double *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsymvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const x[], int incx, const hipblasComplex *beta, hipblasComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsymvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const y[], int incy, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsymvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *x, int incx, hipblasStride stridex, const float *beta, float *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsymvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *x, int incx, hipblasStride stridex, const double *beta, double *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsymvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *beta, hipblasComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsymvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *beta, hipblasDoubleComplex *y, int incy, hipblasStride stridey, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsyr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, float *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsyr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, double *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsyr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsyr (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsyrBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *const x[], int incx, float *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsyrBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *const x[], int incx, double *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsyrBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, hipblasComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsyrBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsyrStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, hipblasStride stridex, float *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsyrStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, hipblasStride stridex, double *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsyrStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsyrStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsyr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, const float *y, int incy, float *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsyr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, const double *y, int incy, double *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsyr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, const hipblasComplex *y, int incy, hipblasComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsyr2 (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, const hipblasDoubleComplex *y, int incy, hipblasDoubleComplex *AP, int lda) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsyr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *const x[], int incx, const float *const y[], int incy, float *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsyr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *const x[], int incx, const double *const y[], int incy, double *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsyr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *const x[], int incx, const hipblasComplex *const y[], int incy, hipblasComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsyr2Batched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const x[], int incx, const hipblasDoubleComplex *const y[], int incy, hipblasDoubleComplex *const AP[], int lda, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasSsyr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const float *alpha, const float *x, int incx, hipblasStride stridex, const float *y, int incy, hipblasStride stridey, float *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDsyr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const double *alpha, const double *x, int incx, hipblasStride stridex, const double *y, int incy, hipblasStride stridey, double *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCsyr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasComplex *alpha, const hipblasComplex *x, int incx, hipblasStride stridex, const hipblasComplex *y, int incy, hipblasStride stridey, hipblasComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZsyr2StridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, const hipblasDoubleComplex *y, int incy, hipblasStride stridey, hipblasDoubleComplex *AP, int lda, hipblasStride strideA, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float *AP, int lda, float *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double *AP, int lda, double *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex *AP, int lda, hipblasComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtbmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex *AP, int lda, hipblasDoubleComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float *const AP[], int lda, float *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double *const AP[], int lda, double *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex *const AP[], int lda, hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtbmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex *const AP[], int lda, hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const float *AP, int lda, hipblasStride strideA, float *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const double *AP, int lda, hipblasStride strideA, double *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasComplex *AP, int lda, hipblasStride strideA, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtbmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int k, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStbsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float *AP, int lda, float *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtbsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double *AP, int lda, double *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtbsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex *AP, int lda, hipblasComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtbsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex *AP, int lda, hipblasDoubleComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStbsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float *const AP[], int lda, float *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtbsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double *const AP[], int lda, double *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtbsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex *const AP[], int lda, hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtbsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex *const AP[], int lda, hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStbsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const float *AP, int lda, hipblasStride strideA, float *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtbsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const double *AP, int lda, hipblasStride strideA, double *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtbsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasComplex *AP, int lda, hipblasStride strideA, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtbsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int n, int k, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStpmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, float *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtpmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, double *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtpmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, hipblasComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtpmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, hipblasDoubleComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStpmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *const AP[], float *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtpmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *const AP[], double *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtpmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *const AP[], hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtpmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *const AP[], hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStpmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, hipblasStride strideA, float *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtpmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, hipblasStride strideA, double *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtpmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, hipblasStride strideA, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtpmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, hipblasStride strideA, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStpsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, float *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtpsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, double *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtpsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, hipblasComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtpsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, hipblasDoubleComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStpsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *const AP[], float *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtpsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *const AP[], double *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtpsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *const AP[], hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtpsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *const AP[], hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStpsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, hipblasStride strideA, float *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtpsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, hipblasStride strideA, double *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtpsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, hipblasStride strideA, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtpsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, hipblasStride strideA, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStrmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, int lda, float *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtrmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, int lda, double *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtrmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, int lda, hipblasComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtrmv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, int lda, hipblasDoubleComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStrmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *const AP[], int lda, float *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtrmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *const AP[], int lda, double *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtrmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *const AP[], int lda, hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtrmvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *const AP[], int lda, hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStrmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, int lda, hipblasStride strideA, float *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtrmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, int lda, hipblasStride strideA, double *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtrmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, int lda, hipblasStride strideA, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtrmvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStrsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, int lda, float *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtrsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, int lda, double *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtrsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, int lda, hipblasComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtrsv (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, int lda, hipblasDoubleComplex *x, int incx) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStrsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *const AP[], int lda, float *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtrsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *const AP[], int lda, double *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtrsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *const AP[], int lda, hipblasComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtrsvBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *const AP[], int lda, hipblasDoubleComplex *const x[], int incx, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasStrsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const float *AP, int lda, hipblasStride strideA, float *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasDtrsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const double *AP, int lda, hipblasStride strideA, double *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasCtrsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasComplex *AP, int lda, hipblasStride strideA, hipblasComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasZtrsvStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, hipblasDoubleComplex *x, int incx, hipblasStride stridex, int batchCount) |
| BLAS Level 2 API. More... | |
| hipblasStatus_t | hipblasHgemm (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf *alpha, const hipblasHalf *AP, int lda, const hipblasHalf *BP, int ldb, const hipblasHalf *beta, hipblasHalf *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSgemm (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float *alpha, const float *AP, int lda, const float *BP, int ldb, const float *beta, float *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDgemm (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double *alpha, const double *AP, int lda, const double *BP, int ldb, const double *beta, double *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCgemm (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, const hipblasComplex *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZgemm (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasHgemmBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf *alpha, const hipblasHalf *const AP[], int lda, const hipblasHalf *const BP[], int ldb, const hipblasHalf *beta, hipblasHalf *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSgemmBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float *alpha, const float *const AP[], int lda, const float *const BP[], int ldb, const float *beta, float *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDgemmBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double *alpha, const double *const AP[], int lda, const double *const BP[], int ldb, const double *beta, double *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCgemmBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, const hipblasComplex *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZgemmBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasHgemmStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasHalf *alpha, const hipblasHalf *AP, int lda, long long strideA, const hipblasHalf *BP, int ldb, long long strideB, const hipblasHalf *beta, hipblasHalf *CP, int ldc, long long strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSgemmStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const float *alpha, const float *AP, int lda, long long strideA, const float *BP, int ldb, long long strideB, const float *beta, float *CP, int ldc, long long strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDgemmStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const double *alpha, const double *AP, int lda, long long strideA, const double *BP, int ldb, long long strideB, const double *beta, double *CP, int ldc, long long strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCgemmStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, long long strideA, const hipblasComplex *BP, int ldb, long long strideB, const hipblasComplex *beta, hipblasComplex *CP, int ldc, long long strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZgemmStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, long long strideA, const hipblasDoubleComplex *BP, int ldb, long long strideB, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc, long long strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCherk (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const hipblasComplex *AP, int lda, const float *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZherk (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const hipblasDoubleComplex *AP, int lda, const double *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCherkBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const hipblasComplex *const AP[], int lda, const float *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZherkBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const hipblasDoubleComplex *const AP[], int lda, const double *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCherkStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const float *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZherkStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const double *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCherkx (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, const float *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZherkx (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, const double *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCherkxBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, const float *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZherkxBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, const double *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCherkxStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *BP, int ldb, hipblasStride strideB, const float *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZherkxStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, const double *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCher2k (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, const float *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZher2k (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, const double *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCher2kBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, const float *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZher2kBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, const double *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCher2kStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *BP, int ldb, hipblasStride strideB, const float *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZher2kStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, const double *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsymm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float *alpha, const float *AP, int lda, const float *BP, int ldb, const float *beta, float *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsymm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double *alpha, const double *AP, int lda, const double *BP, int ldb, const double *beta, double *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsymm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, const hipblasComplex *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsymm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsymmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float *alpha, const float *const AP[], int lda, const float *const BP[], int ldb, const float *beta, float *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsymmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double *alpha, const double *const AP[], int lda, const double *const BP[], int ldb, const double *beta, double *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsymmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, const hipblasComplex *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsymmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsymmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *BP, int ldb, hipblasStride strideB, const float *beta, float *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsymmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *BP, int ldb, hipblasStride strideB, const double *beta, double *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsymmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *BP, int ldb, hipblasStride strideB, const hipblasComplex *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsymmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyrk (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *AP, int lda, const float *beta, float *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyrk (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *AP, int lda, const double *beta, double *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyrk (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyrk (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyrkBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *const AP[], int lda, const float *beta, float *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyrkBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *const AP[], int lda, const double *beta, double *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyrkBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyrkBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyrkStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *beta, float *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyrkStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *beta, double *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyrkStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyrkStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyr2k (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *AP, int lda, const float *BP, int ldb, const float *beta, float *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyr2k (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *AP, int lda, const double *BP, int ldb, const double *beta, double *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyr2k (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, const hipblasComplex *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyr2k (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyr2kBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *const AP[], int lda, const float *const BP[], int ldb, const float *beta, float *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyr2kBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *const AP[], int lda, const double *const BP[], int ldb, const double *beta, double *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyr2kBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, const hipblasComplex *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyr2kBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyr2kStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *BP, int ldb, hipblasStride strideB, const float *beta, float *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyr2kStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *BP, int ldb, hipblasStride strideB, const double *beta, double *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyr2kStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *BP, int ldb, hipblasStride strideB, const hipblasComplex *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyr2kStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyrkx (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *AP, int lda, const float *BP, int ldb, const float *beta, float *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyrkx (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *AP, int lda, const double *BP, int ldb, const double *beta, double *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyrkx (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, const hipblasComplex *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyrkx (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyrkxBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *const AP[], int lda, const float *const BP[], int ldb, const float *beta, float *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyrkxBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *const AP[], int lda, const double *const BP[], int ldb, const double *beta, double *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyrkxBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, const hipblasComplex *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyrkxBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSsyrkxStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *BP, int ldb, hipblasStride strideB, const float *beta, float *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDsyrkxStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *BP, int ldb, hipblasStride strideB, const double *beta, double *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCsyrkxStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *BP, int ldb, hipblasStride strideB, const hipblasComplex *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZsyrkxStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasOperation_t transA, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSgeam (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float *alpha, const float *AP, int lda, const float *beta, const float *BP, int ldb, float *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDgeam (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double *alpha, const double *AP, int lda, const double *beta, const double *BP, int ldb, double *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCgeam (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *beta, const hipblasComplex *BP, int ldb, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZgeam (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *beta, const hipblasDoubleComplex *BP, int ldb, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSgeamBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float *alpha, const float *const AP[], int lda, const float *beta, const float *const BP[], int ldb, float *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDgeamBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double *alpha, const double *const AP[], int lda, const double *beta, const double *const BP[], int ldb, double *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCgeamBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *beta, const hipblasComplex *const BP[], int ldb, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZgeamBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *beta, const hipblasDoubleComplex *const BP[], int ldb, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSgeamStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const float *alpha, const float *AP, int lda, hipblasStride strideA, const float *beta, const float *BP, int ldb, hipblasStride strideB, float *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDgeamStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const double *alpha, const double *AP, int lda, hipblasStride strideA, const double *beta, const double *BP, int ldb, hipblasStride strideB, double *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCgeamStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *beta, const hipblasComplex *BP, int ldb, hipblasStride strideB, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZgeamStridedBatched (hipblasHandle_t handle, hipblasOperation_t transA, hipblasOperation_t transB, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *beta, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasChemm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, const hipblasComplex *BP, int ldb, const hipblasComplex *beta, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZhemm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *BP, int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasChemmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, const hipblasComplex *const BP[], int ldb, const hipblasComplex *beta, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZhemmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const BP[], int ldb, const hipblasDoubleComplex *beta, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasChemmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *BP, int ldb, hipblasStride strideB, const hipblasComplex *beta, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZhemmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, int n, int k, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, const hipblasDoubleComplex *beta, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrmm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *AP, int lda, float *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrmm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *AP, int lda, double *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrmm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasComplex *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrmm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasDoubleComplex *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *const AP[], int lda, float *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *const AP[], int lda, double *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, hipblasComplex *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, hipblasDoubleComplex *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *AP, int lda, hipblasStride strideA, float *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *AP, int lda, hipblasStride strideA, double *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, hipblasComplex *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrsm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *AP, int lda, float *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrsm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *AP, int lda, double *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrsm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasComplex *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrsm (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasDoubleComplex *BP, int ldb) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrsmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *const AP[], int lda, float *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrsmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *const AP[], int lda, double *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrsmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *const AP[], int lda, hipblasComplex *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrsmBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *const AP[], int lda, hipblasDoubleComplex *const BP[], int ldb, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrsmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const float *alpha, const float *AP, int lda, hipblasStride strideA, float *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrsmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const double *alpha, const double *AP, int lda, hipblasStride strideA, double *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrsmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasComplex *alpha, const hipblasComplex *AP, int lda, hipblasStride strideA, hipblasComplex *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrsmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, hipblasFillMode_t uplo, hipblasOperation_t transA, hipblasDiagType_t diag, int m, int n, const hipblasDoubleComplex *alpha, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, hipblasDoubleComplex *BP, int ldb, hipblasStride strideB, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrtri (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const float *AP, int lda, float *invA, int ldinvA) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrtri (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const double *AP, int lda, double *invA, int ldinvA) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrtri (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasComplex *AP, int lda, hipblasComplex *invA, int ldinvA) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrtri (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasDoubleComplex *AP, int lda, hipblasDoubleComplex *invA, int ldinvA) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrtriBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const float *const AP[], int lda, float *invA[], int ldinvA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrtriBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const double *const AP[], int lda, double *invA[], int ldinvA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrtriBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasComplex *const AP[], int lda, hipblasComplex *invA[], int ldinvA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrtriBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasDoubleComplex *const AP[], int lda, hipblasDoubleComplex *invA[], int ldinvA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasStrtriStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const float *AP, int lda, hipblasStride strideA, float *invA, int ldinvA, hipblasStride stride_invA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDtrtriStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const double *AP, int lda, hipblasStride strideA, double *invA, int ldinvA, hipblasStride stride_invA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCtrtriStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasComplex *AP, int lda, hipblasStride strideA, hipblasComplex *invA, int ldinvA, hipblasStride stride_invA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZtrtriStridedBatched (hipblasHandle_t handle, hipblasFillMode_t uplo, hipblasDiagType_t diag, int n, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, hipblasDoubleComplex *invA, int ldinvA, hipblasStride stride_invA, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSdgmm (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float *AP, int lda, const float *x, int incx, float *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDdgmm (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double *AP, int lda, const double *x, int incx, double *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCdgmm (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex *AP, int lda, const hipblasComplex *x, int incx, hipblasComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZdgmm (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex *AP, int lda, const hipblasDoubleComplex *x, int incx, hipblasDoubleComplex *CP, int ldc) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSdgmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float *const AP[], int lda, const float *const x[], int incx, float *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDdgmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double *const AP[], int lda, const double *const x[], int incx, double *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCdgmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex *const AP[], int lda, const hipblasComplex *const x[], int incx, hipblasComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZdgmmBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex *const AP[], int lda, const hipblasDoubleComplex *const x[], int incx, hipblasDoubleComplex *const CP[], int ldc, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSdgmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const float *AP, int lda, hipblasStride strideA, const float *x, int incx, hipblasStride stridex, float *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasDdgmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const double *AP, int lda, hipblasStride strideA, const double *x, int incx, hipblasStride stridex, double *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasCdgmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasComplex *AP, int lda, hipblasStride strideA, const hipblasComplex *x, int incx, hipblasStride stridex, hipblasComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasZdgmmStridedBatched (hipblasHandle_t handle, hipblasSideMode_t side, int m, int n, const hipblasDoubleComplex *AP, int lda, hipblasStride strideA, const hipblasDoubleComplex *x, int incx, hipblasStride stridex, hipblasDoubleComplex *CP, int ldc, hipblasStride strideC, int batchCount) |
| BLAS Level 3 API. More... | |
| hipblasStatus_t | hipblasSgetrf (hipblasHandle_t handle, const int n, float *A, const int lda, int *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgetrf (hipblasHandle_t handle, const int n, double *A, const int lda, int *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgetrf (hipblasHandle_t handle, const int n, hipblasComplex *A, const int lda, int *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgetrf (hipblasHandle_t handle, const int n, hipblasDoubleComplex *A, const int lda, int *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgetrfBatched (hipblasHandle_t handle, const int n, float *const A[], const int lda, int *ipiv, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgetrfBatched (hipblasHandle_t handle, const int n, double *const A[], const int lda, int *ipiv, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgetrfBatched (hipblasHandle_t handle, const int n, hipblasComplex *const A[], const int lda, int *ipiv, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgetrfBatched (hipblasHandle_t handle, const int n, hipblasDoubleComplex *const A[], const int lda, int *ipiv, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgetrfStridedBatched (hipblasHandle_t handle, const int n, float *A, const int lda, const hipblasStride strideA, int *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgetrfStridedBatched (hipblasHandle_t handle, const int n, double *A, const int lda, const hipblasStride strideA, int *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgetrfStridedBatched (hipblasHandle_t handle, const int n, hipblasComplex *A, const int lda, const hipblasStride strideA, int *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgetrfStridedBatched (hipblasHandle_t handle, const int n, hipblasDoubleComplex *A, const int lda, const hipblasStride strideA, int *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgetrs (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float *A, const int lda, const int *ipiv, float *B, const int ldb, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgetrs (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double *A, const int lda, const int *ipiv, double *B, const int ldb, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgetrs (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex *A, const int lda, const int *ipiv, hipblasComplex *B, const int ldb, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgetrs (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex *A, const int lda, const int *ipiv, hipblasDoubleComplex *B, const int ldb, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgetrsBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float *const A[], const int lda, const int *ipiv, float *const B[], const int ldb, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgetrsBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double *const A[], const int lda, const int *ipiv, double *const B[], const int ldb, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgetrsBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex *const A[], const int lda, const int *ipiv, hipblasComplex *const B[], const int ldb, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgetrsBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex *const A[], const int lda, const int *ipiv, hipblasDoubleComplex *const B[], const int ldb, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgetrsStridedBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, float *A, const int lda, const hipblasStride strideA, const int *ipiv, const hipblasStride strideP, float *B, const int ldb, const hipblasStride strideB, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgetrsStridedBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, double *A, const int lda, const hipblasStride strideA, const int *ipiv, const hipblasStride strideP, double *B, const int ldb, const hipblasStride strideB, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgetrsStridedBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasComplex *A, const int lda, const hipblasStride strideA, const int *ipiv, const hipblasStride strideP, hipblasComplex *B, const int ldb, const hipblasStride strideB, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgetrsStridedBatched (hipblasHandle_t handle, const hipblasOperation_t trans, const int n, const int nrhs, hipblasDoubleComplex *A, const int lda, const hipblasStride strideA, const int *ipiv, const hipblasStride strideP, hipblasDoubleComplex *B, const int ldb, const hipblasStride strideB, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgetriBatched (hipblasHandle_t handle, const int n, float *const A[], const int lda, int *ipiv, float *const C[], const int ldc, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgetriBatched (hipblasHandle_t handle, const int n, double *const A[], const int lda, int *ipiv, double *const C[], const int ldc, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgetriBatched (hipblasHandle_t handle, const int n, hipblasComplex *const A[], const int lda, int *ipiv, hipblasComplex *const C[], const int ldc, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgetriBatched (hipblasHandle_t handle, const int n, hipblasDoubleComplex *const A[], const int lda, int *ipiv, hipblasDoubleComplex *const C[], const int ldc, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgels (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float *A, const int lda, float *B, const int ldb, int *info, int *deviceInfo) |
| GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF"). More... | |
| hipblasStatus_t | hipblasDgels (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double *A, const int lda, double *B, const int ldb, int *info, int *deviceInfo) |
| GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF"). More... | |
| hipblasStatus_t | hipblasCgels (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex *A, const int lda, hipblasComplex *B, const int ldb, int *info, int *deviceInfo) |
| GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF"). More... | |
| hipblasStatus_t | hipblasZgels (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex *A, const int lda, hipblasDoubleComplex *B, const int ldb, int *info, int *deviceInfo) |
| GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF"). More... | |
| hipblasStatus_t | hipblasSgelsBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float *const A[], const int lda, float *const B[], const int ldb, int *info, int *deviceInfo, const int batchCount) |
| gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED"). More... | |
| hipblasStatus_t | hipblasDgelsBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double *const A[], const int lda, double *const B[], const int ldb, int *info, int *deviceInfo, const int batchCount) |
| gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED"). More... | |
| hipblasStatus_t | hipblasCgelsBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex *const A[], const int lda, hipblasComplex *const B[], const int ldb, int *info, int *deviceInfo, const int batchCount) |
| gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED"). More... | |
| hipblasStatus_t | hipblasZgelsBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex *const A[], const int lda, hipblasDoubleComplex *const B[], const int ldb, int *info, int *deviceInfo, const int batchCount) |
| gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED"). More... | |
| hipblasStatus_t | hipblasSgelsStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, float *A, const int lda, const hipblasStride strideA, float *B, const int ldb, const hipblasStride strideB, int *info, int *deviceInfo, const int batch_count) |
| gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED"). More... | |
| hipblasStatus_t | hipblasDgelsStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, double *A, const int lda, const hipblasStride strideA, double *B, const int ldb, const hipblasStride strideB, int *info, int *deviceInfo, const int batch_count) |
| gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED"). More... | |
| hipblasStatus_t | hipblasCgelsStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasComplex *A, const int lda, const hipblasStride strideA, hipblasComplex *B, const int ldb, const hipblasStride strideB, int *info, int *deviceInfo, const int batch_count) |
| gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED"). More... | |
| hipblasStatus_t | hipblasZgelsStridedBatched (hipblasHandle_t handle, hipblasOperation_t trans, const int m, const int n, const int nrhs, hipblasDoubleComplex *A, const int lda, const hipblasStride strideA, hipblasDoubleComplex *B, const int ldb, const hipblasStride strideB, int *info, int *deviceInfo, const int batch_count) |
| gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED"). More... | |
| hipblasStatus_t | hipblasSgeqrf (hipblasHandle_t handle, const int m, const int n, float *A, const int lda, float *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgeqrf (hipblasHandle_t handle, const int m, const int n, double *A, const int lda, double *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgeqrf (hipblasHandle_t handle, const int m, const int n, hipblasComplex *A, const int lda, hipblasComplex *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgeqrf (hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex *A, const int lda, hipblasDoubleComplex *ipiv, int *info) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgeqrfBatched (hipblasHandle_t handle, const int m, const int n, float *const A[], const int lda, float *const ipiv[], int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgeqrfBatched (hipblasHandle_t handle, const int m, const int n, double *const A[], const int lda, double *const ipiv[], int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgeqrfBatched (hipblasHandle_t handle, const int m, const int n, hipblasComplex *const A[], const int lda, hipblasComplex *const ipiv[], int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgeqrfBatched (hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex *const A[], const int lda, hipblasDoubleComplex *const ipiv[], int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasSgeqrfStridedBatched (hipblasHandle_t handle, const int m, const int n, float *A, const int lda, const hipblasStride strideA, float *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDgeqrfStridedBatched (hipblasHandle_t handle, const int m, const int n, double *A, const int lda, const hipblasStride strideA, double *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasCgeqrfStridedBatched (hipblasHandle_t handle, const int m, const int n, hipblasComplex *A, const int lda, const hipblasStride strideA, hipblasComplex *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasZgeqrfStridedBatched (hipblasHandle_t handle, const int m, const int n, hipblasDoubleComplex *A, const int lda, const hipblasStride strideA, hipblasDoubleComplex *ipiv, const hipblasStride strideP, int *info, const int batchCount) |
| SOLVER API. More... | |
| hipblasStatus_t | hipblasDotEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, const void *y, hipblasDatatype_t yType, int incy, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasDotcEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, const void *y, hipblasDatatype_t yType, int incy, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasDotBatchedEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, const void *y, hipblasDatatype_t yType, int incy, int batchCount, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasDotcBatchedEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, const void *y, hipblasDatatype_t yType, int incy, int batchCount, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasDotStridedBatchedEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, hipblasStride stridex, const void *y, hipblasDatatype_t yType, int incy, hipblasStride stridey, int batchCount, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| hipblasStatus_t | hipblasDotcStridedBatchedEx (hipblasHandle_t handle, int n, const void *x, hipblasDatatype_t xType, int incx, hipblasStride stridex, const void *y, hipblasDatatype_t yType, int incy, hipblasStride stridey, int batchCount, void *result, hipblasDatatype_t resultType, hipblasDatatype_t executionType) |
| BLAS EX API. More... | |
| #define HIPBLAS_CLANG_STATIC |
HIP = Heterogeneous-compute Interface for Portability
Define a extremely thin runtime layer that allows source code to be compiled unmodified through either AMD HCC or NVCC. Key features tend to be in the spirit and terminology of CUDA, but with a portable path to other accelerators as well.
This is the master include file for hipblas, wrapping around rocblas and cublas "version 2"
| typedef uint16_t hipblasHalf |
To specify the datatype to be unsigned short.
| typedef void* hipblasHandle_t |
hipblasHanlde_t is a void pointer, to store the library context (either rocBLAS or cuBLAS)
| typedef int8_t hipblasInt8 |
To specify the datatype to be signed char.
| typedef int64_t hipblasStride |
Stride between matrices or vectors in strided_batched functions.
| enum hipblasAtomicsMode_t |
Indicates if atomics operations are allowed. Not allowing atomic operations may generally improve determinism and repeatability of results at a cost of performance.
| Enumerator | |
|---|---|
| HIPBLAS_ATOMICS_NOT_ALLOWED | Algorithms will refrain from atomics where applicable. |
| HIPBLAS_ATOMICS_ALLOWED | Algorithms will take advantage of atomics where applicable. |
| enum hipblasDatatype_t |
Indicates the precision width of data stored in a blas type.
| enum hipblasDiagType_t |
| enum hipblasFillMode_t |
| enum hipblasGemmAlgo_t |
| enum hipblasOperation_t |
| enum hipblasPointerMode_t |
Indicates if scalar pointers are on host or device. This is used for scalars alpha and beta and for scalar function return values.
| enum hipblasSideMode_t |
Indicates the side matrix A is located relative to matrix B during multiplication.
| enum hipblasStatus_t |
hipblas status codes definition
| hipblasStatus_t hipblasAxpyBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | alpha, | ||
| hipblasDatatype_t | alphaType, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| int | batchCount, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
axpyBatchedEx computes constant alpha multiplied by vector x, plus vector y over a set of batched vectors.
y := alpha * x + y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | alphaType | [hipblasDatatype_t] specifies the datatype of alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasAxpyEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | alpha, | ||
| hipblasDatatype_t | alphaType, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
axpyEx computes constant alpha multiplied by vector x, plus vector y
y := alpha * x + y - Supported types are determined by the backend. See rocBLAS/cuBLAS documentation.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | alphaType | [hipblasDatatype_t] specifies the datatype of alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasAxpyStridedBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | alpha, | ||
| hipblasDatatype_t | alphaType, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
axpyStridedBatchedEx computes constant alpha multiplied by vector x, plus vector y over a set of strided batched vectors.
y := alpha * x + y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | alphaType | [hipblasDatatype_t] specifies the datatype of alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) to the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= n * incx. |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) to the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size, for a typical case this means stridey >= n * incy. |
| [in] | batchCount | [int] number of instances in the batch. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasBfdot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasBfloat16 * | x, | ||
| int | incx, | ||
| const hipblasBfloat16 * | y, | ||
| int | incy, | ||
| hipblasBfloat16 * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasBfdotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasBfloat16 *const | x[], | ||
| int | incx, | ||
| const hipblasBfloat16 *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| hipblasBfloat16 * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasBfdotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasBfloat16 * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasBfloat16 * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| hipblasBfloat16 * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasCaxpy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
axpy computes constant alpha multiplied by vector x, plus vector y
y := alpha * x + y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasCaxpyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyBatched compute y := alpha * x + y over a set of batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCaxpyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyStridedBatched compute y := alpha * x + y over a set of strided batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] specifies the increment between vectors of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] specifies the increment between vectors of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCcopy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
copy copies each element x[i] into y[i], for i = 1 , ... , n
y := x,
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x to be copied to y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasCcopyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCcopyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyStridedBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_y, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCdgmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
dgmm performs one of the matrix-matrix operations
C = A * diag(x) if side == HIPBLAS_SIDE_RIGHT C = diag(x) * A if side == HIPBLAS_SIDE_LEFT
where C and A are m by n dimensional matrices. diag( x ) is a diagonal matrix and x is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between values of x |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasCdgmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_RIGHT C_i = diag(x_i) * A_i for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_LEFT
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A_i. |
| [in] | x | device array of device pointers storing each vector x_i on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x_i |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCdgmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmStridedBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) if side == HIPBLAS_SIDE_RIGHT for i = 0, 1, ... batchCount-1 C_i = diag(x_i) * A_i if side == HIPBLAS_SIDE_LEFT for i = 0, 1, ... batchCount-1
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | pointer to the first vector x_0 on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x |
| [in] | stridex | [hipblasStride] stride from the start of one vector(x_i) and the next one (x_i+1) |
| [in,out] | CP | device pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasCdotc | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasComplex * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasCdotcBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| hipblasComplex * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasCdotcStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| hipblasComplex * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasCdotu | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasComplex * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasCdotuBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| hipblasComplex * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasCdotuStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| hipblasComplex * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasCgbmv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gbmv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n banded matrix with kl sub-diagonals and ku super-diagonals.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing banded matrix A. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasCgbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | kl | [int] number of sub-diagonals of each A_i |
| [in] | ku | [int] number of super-diagonals of each A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each banded matrix A_i. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be >= (kl + ku + 1) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasCgbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to first banded matrix (A_1). Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (x_i+1) |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasCgeam | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | beta, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
geam performs one of the matrix-matrix operations
C = alpha*op( A ) + beta*op( B ),
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by n matrix, op( B ) an m by n matrix, and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasCgeamBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex * | beta, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasCgeamStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | beta, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamStridedBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | pointer to the first matrix B_0 on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in,out] | CP | pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasCgels | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| hipblasComplex * | B, | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo | ||
| ) |
GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF").
Depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A X = B & \: \text{not transposed, or}\\ A' X = B & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X is found by minimizing
\[ || B - A X || \quad \text{(or} \: || B - A' X ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X is chosen such that \(|| X ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of matrix A. |
| [in] | n | int. n >= 0. The number of columns of matrix A. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of matrices B and X; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the matrix A. On exit, the QR (or LQ) factorization of A as returned by "GEQRF" (or "GELQF"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrix A. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the matrix B. On exit, when info = 0, B is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrix B. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int on the GPU. If info = 0, successful exit. If info = i > 0, the solution could not be computed because input matrix A is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| hipblasStatus_t hipblasCgelsBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasComplex *const | A[], | ||
| const int | lda, | ||
| hipblasComplex *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batchCount | ||
| ) |
gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | array of pointer to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_BATCHED" (or "GELQF_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in,out] | B | array of pointer to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the matrices B_j. On exit, when info[j] = 0, B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasCgelsStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| hipblasComplex * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batch_count | ||
| ) |
gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_STRIDED_BATCHED" (or "GELQF_STRIDED_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_j to the next one A_(j+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [in,out] | B | pointer to type. Array on the GPU (the size depends on the value of strideB). On entry, the matrices B_j. On exit, when info[j] = 0, each B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_j to the next one B_(j+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasCgemm | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
gemm performs one of the matrix-matrix operations
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] . |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] number or rows of matrices op( A ) and C |
| [in] | n | [int] number of columns of matrices op( B ) and C |
| [in] | k | [int] number of columns of matrix op( A ) and number of rows of matrix op( B ) |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer storing matrix C on the GPU. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasCgemmBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmBatched performs one of the batched matrix-matrix operations C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount. where op( X ) is one of op( X ) = X or op( X ) = X**T or op( X ) = X**H, alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | BP | device array of device pointers storing each matrix B_i. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device array of device pointers storing each matrix C_i. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | batchCount | [int] number of gemm operations in the batch |
| hipblasStatus_t hipblasCgemmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| long long | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| long long | strideB, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| long long | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmStridedBatched performs one of the strided batched matrix-matrix operations
C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount.
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer pointing to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer pointing to the first matrix C_1. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | strideC | [hipblasStride] stride from the start of one C_i matrix to the next C_(i + 1). |
| [in] | batchCount | [int] number of gemm operatons in the batch |
| hipblasStatus_t hipblasCgemv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gemv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasCgemvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCgemvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvStridedBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrices A_i |
| [in] | n | [int] number of columns of matrices A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of matrices A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridex >= n * incx, otherwise stridex >= m * incx. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridey >= m * incy, otherwise stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCgeqrf | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| hipblasComplex * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
geqrf computes a QR factorization of a general m-by-n matrix A.
The factorization has the form
\[ A = Q\left[\begin{array}{c} R\\ 0 \end{array}\right] \]
where R is upper triangular (upper trapezoidal if m < n), and Q is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q = H_1H_2\cdots H_k, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_i\) is given by
\[ H_i = I - \text{ipiv}[i] \cdot v_i v_i' \]
where the first i-1 elements of the Householder vector \(v_i\) are zero, and \(v_i[i] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of the matrix A. |
| [in] | n | int. n >= 0. The number of columns of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the m-by-n matrix to be factored. On exit, the elements on and above the diagonal contain the factor R; the elements below the diagonal are the last m - i elements of Householder vector v_i. |
| [in] | lda | int. lda >= m. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to type. Array on the GPU of dimension min(m,n). The Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasCgeqrfBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| hipblasComplex *const | A[], | ||
| const int | lda, | ||
| hipblasComplex *const | ipiv[], | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_i[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | array of pointers to type. Each pointer points to an array on the GPU of dimension min(m, n). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasCgeqrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| hipblasComplex * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfStridedBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_j[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [out] | ipiv | pointer to type. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use is strideP >= min(m,n). |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasCgerc | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
ger,geru,gerc performs the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where alpha is a scalar, x and y are vectors, and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of the matrix A. |
| [in] | n | [int] the number of columns of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasCgercBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex *const | y[], | ||
| int | incy, | ||
| hipblasComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerBatched,geruBatched,gercBatched performs a batch of the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of eaceh matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCgercStridedBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerStridedBatched,geruStridedBatched,gercStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**T, OR A_i := A_i + alpha*x_i*y_i**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of each vector x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= m * incx. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size, for a typical case this means stridey >= n * incy. |
| [in,out] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCgeru | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
ger,geru,gerc performs the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where alpha is a scalar, x and y are vectors, and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of the matrix A. |
| [in] | n | [int] the number of columns of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasCgeruBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex *const | y[], | ||
| int | incy, | ||
| hipblasComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerBatched,geruBatched,gercBatched performs a batch of the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of eaceh matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCgeruStridedBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerStridedBatched,geruStridedBatched,gercStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**T, OR A_i := A_i + alpha*x_i*y_i**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of each vector x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= m * incx. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size, for a typical case this means stridey >= n * incy. |
| [in,out] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCgetrf | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
getrf computes the LU factorization of a general n-by-n matrix A using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization has the form:
\[ A = PLU \]
where P is a permutation matrix, L is lower triangular with unit diagonal elements, and U is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A = LU \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the n-by-n matrix A to be factored. On exit, the factors L and U from the factorization. The unit diagonal elements of L are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to int. Array on the GPU of dimension n. The vector of pivot indices. Elements of ipiv are 1-based indices. For 1 <= i <= n, the row i of the matrix was interchanged with row ipiv[i]. Matrix P of the factorization can be derived from ipiv. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to a int on the GPU. If info = 0, successful exit. If info = j > 0, U is singular. U[j,j] is the first zero pivot. |
| hipblasStatus_t hipblasCgetrfBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasComplex *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorizations. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | pointer to int. Array on the GPU. Contains the vectors of pivot indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasCgetrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfStridedBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorization. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [out] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors of pivots indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasCgetriBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasComplex *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| hipblasComplex *const | C[], | ||
| const int | ldc, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getriBatched computes the inverse \(C_i = A_i^{-1}\) of a batch of general n-by-n matrices \(A_i\).
The inverse is computed by solving the linear system
\[ A_i C_i = I \]
where I is the identity matrix, and \(A_i\) is factorized as \(A_i = P_i L_i U_i\) as given by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of rows and columns of all matrices A_i in the batch. |
| [in] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). The pivot indices returned by getrfBatched. ipiv can be passed in as a nullptr, this will assume that getrfBatched was called without partial pivoting. |
| [out] | C | array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n. If info[i] = 0, the inverse of matrices A_i. Otherwise, undefined. |
| [in] | ldc | int. ldc >= n. Specifies the leading dimension of C_i. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for inversion of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasCgetrs | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| hipblasComplex * | B, | ||
| const int | ldb, | ||
| int * | info | ||
| ) |
SOLVER API.
getrs solves a system of n linear equations on n variables in its factorized form.
It solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A X = B & \: \text{not transposed,}\\ A^T X = B & \: \text{transposed, or}\\ A^H X = B & \: \text{conjugate transposed.} \end{array} \]
Matrix A is defined by its triangular factors as returned by getrf.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of A. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of the matrix B. |
| [in] | A | pointer to type. Array on the GPU of dimension lda*n. The factors L and U of the factorization A = P*L*U returned by getrf. |
| [in] | lda | int. lda >= n. The leading dimension of A. |
| [in] | ipiv | pointer to int. Array on the GPU of dimension n. The pivot indices returned by getrf. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrix B. On exit, the solution matrix X. |
| [in] | ldb | int. ldb >= n. The leading dimension of B. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasCgetrsBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasComplex *const | A[], | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| hipblasComplex *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU. Contains the vectors ipiv_i of pivot indices returned by getrfBatched. |
| [in,out] | B | Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasCgetrsStridedBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| const int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| hipblasComplex * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsStridedBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfStridedBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfStridedBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of pivot indices returned by getrfStridedBatched. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [in,out] | B | pointer to type. Array on the GPU (size depends on the value of strideB). On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_i to the next one B_(i+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasChbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
hbmv performs the matrix-vector operations
y := alpha*A*x + beta*y
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian band matrix, with k super-diagonals.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is being supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is being supplied. |
| [in] | n | [int] the order of the matrix A. |
| [in] | k | [int] the number of super-diagonals of the matrix A. Must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. Of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The leading (k + 1) by n part of A must contain the upper triangular band part of the Hermitian matrix, with the leading diagonal in row (k + 1), the first super-diagonal on the RHS of row k, etc. The top left k by x triangle of A will not be referenced. Ex (upper, lda = n = 4, k = 1): A Represented matrix (0,0) (5,9) (6,8) (7,7) (1, 0) (5, 9) (0, 0) (0, 0) (1,0) (2,0) (3,0) (4,0) (5,-9) (2, 0) (6, 8) (0, 0) (0,0) (0,0) (0,0) (0,0) (0, 0) (6,-8) (3, 0) (7, 7) (0,0) (0,0) (0,0) (0,0) (0, 0) (0, 0) (7,-7) (4, 0) |
if uplo == HIPBLAS_FILL_MODE_LOWER: The leading (k + 1) by n part of A must contain the lower triangular band part of the Hermitian matrix, with the leading diagonal in row (1), the first sub-diagonal on the LHS of row 2, etc. The bottom right k by k triangle of A will not be referenced. Ex (lower, lda = 2, n = 4, k = 1): A Represented matrix (1,0) (2,0) (3,0) (4,0) (1, 0) (5,-9) (0, 0) (0, 0) (5,9) (6,8) (7,7) (0,0) (5, 9) (2, 0) (6,-8) (0, 0) (0, 0) (6, 8) (3, 0) (7,-7) (0, 0) (0, 0) (7, 7) (4, 0)
As a Hermitian matrix, the imaginary part of the main diagonal of A will not be referenced and is assumed to be == 0.
| [in] | lda | [int] specifies the leading dimension of A. must be >= k + 1 |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasChbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hbmvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is being supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is being supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | k | [int] the number of super-diagonals of each matrix A_i. Must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The leading (k + 1) by n part of each A_i must contain the upper triangular band part of the Hermitian matrix, with the leading diagonal in row (k + 1), the first super-diagonal on the RHS of row k, etc. The top left k by x triangle of each A_i will not be referenced. Ex (upper, lda = n = 4, k = 1): A Represented matrix (0,0) (5,9) (6,8) (7,7) (1, 0) (5, 9) (0, 0) (0, 0) (1,0) (2,0) (3,0) (4,0) (5,-9) (2, 0) (6, 8) (0, 0) (0,0) (0,0) (0,0) (0,0) (0, 0) (6,-8) (3, 0) (7, 7) (0,0) (0,0) (0,0) (0,0) (0, 0) (0, 0) (7,-7) (4, 0) |
if uplo == HIPBLAS_FILL_MODE_LOWER: The leading (k + 1) by n part of each A_i must contain the lower triangular band part of the Hermitian matrix, with the leading diagonal in row (1), the first sub-diagonal on the LHS of row 2, etc. The bottom right k by k triangle of each A_i will not be referenced. Ex (lower, lda = 2, n = 4, k = 1): A Represented matrix (1,0) (2,0) (3,0) (4,0) (1, 0) (5,-9) (0, 0) (0, 0) (5,9) (6,8) (7,7) (0,0) (5, 9) (2, 0) (6,-8) (0, 0) (0, 0) (6, 8) (3, 0) (7,-7) (0, 0) (0, 0) (7, 7) (4, 0)
As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0.
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hbmvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is being supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is being supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | k | [int] the number of super-diagonals of each matrix A_i. Must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array pointing to the first matrix A_1. Each A_i is of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The leading (k + 1) by n part of each A_i must contain the upper triangular band part of the Hermitian matrix, with the leading diagonal in row (k + 1), the first super-diagonal on the RHS of row k, etc. The top left k by x triangle of each A_i will not be referenced. Ex (upper, lda = n = 4, k = 1): A Represented matrix (0,0) (5,9) (6,8) (7,7) (1, 0) (5, 9) (0, 0) (0, 0) (1,0) (2,0) (3,0) (4,0) (5,-9) (2, 0) (6, 8) (0, 0) (0,0) (0,0) (0,0) (0,0) (0, 0) (6,-8) (3, 0) (7, 7) (0,0) (0,0) (0,0) (0,0) (0, 0) (0, 0) (7,-7) (4, 0) |
if uplo == HIPBLAS_FILL_MODE_LOWER: The leading (k + 1) by n part of each A_i must contain the lower triangular band part of the Hermitian matrix, with the leading diagonal in row (1), the first sub-diagonal on the LHS of row 2, etc. The bottom right k by k triangle of each A_i will not be referenced. Ex (lower, lda = 2, n = 4, k = 1): A Represented matrix (1,0) (2,0) (3,0) (4,0) (1, 0) (5,-9) (0, 0) (0, 0) (5,9) (6,8) (7,7) (0,0) (5, 9) (2, 0) (6,-8) (0, 0) (0, 0) (6, 8) (3, 0) (7,-7) (0, 0) (0, 0) (7, 7) (4, 0)
As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0.
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device array pointing to the first vector y_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array pointing to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChemm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
hemm performs one of the matrix-matrix operations:
C := alpha*A*B + beta*C if side == HIPBLAS_SIDE_LEFT, C := alpha*B*A + beta*C if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B and C are m by n matrices, and A is a Hermitian matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C := alpha*A*B + beta*C HIPBLAS_SIDE_RIGHT: C := alpha*B*A + beta*C |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix |
| [in] | n | [int] n specifies the number of rows of B and C. n >= 0. |
| [in] | k | [int] n specifies the number of columns of B and C. k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A and B are not referenced. |
| [in] | AP | pointer storing matrix A on the GPU. A is m by m if side == HIPBLAS_SIDE_LEFT A is n by n if side == HIPBLAS_SIDE_RIGHT Only the upper/lower triangular part is accessed. The imaginary component of the diagonal elements is not used. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | pointer storing matrix B on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| hipblasStatus_t hipblasChemmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
hemmBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a Hermitian matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | n | [int] n specifies the number of rows of B_i and C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of B_i and C_i. k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT Only the upper/lower triangular part is accessed. The imaginary component of the diagonal elements is not used. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C_i need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C_i. ldc >= max( 1, m ) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChemmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
hemmStridedBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a Hermitian matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | n | [int] n specifies the number of rows of B_i and C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of B_i and C_i. k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device pointer to first matrix A_1 A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT Only the upper/lower triangular part is accessed. The imaginary component of the diagonal elements is not used. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | device pointer to first matrix B_1 of dimension (ldb, n) on the GPU |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if side = HIPBLAS_OP_N, ldb >= max( 1, m ), otherwise ldb >= max( 1, n ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device pointer to first matrix C_1 of dimension (ldc, n) on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasChemv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
hemv performs one of the matrix-vector operations
y := alpha*A*x + beta*y
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied. |
| [in] | n | [int] the order of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. Of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A must contain the upper triangular part of a Hermitian matrix. The lower triangular part of A will not be referenced. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A must contain the lower triangular part of a Hermitian matrix. The upper triangular part of A will not be referenced. As a Hermitian matrix, the imaginary part of the main diagonal of A will not be referenced and is assumed to be == 0. |
| [in] | lda | [int] specifies the leading dimension of A. must be >= max(1, n) |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasChemvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hemvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i must contain the upper triangular part of a Hermitian matrix. The lower triangular part of each A_i will not be referenced. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i must contain the lower triangular part of a Hermitian matrix. The upper triangular part of each A_i will not be referenced. As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChemvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hemvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i must contain the upper triangular part of a Hermitian matrix. The lower triangular part of each A_i will not be referenced. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i must contain the lower triangular part of a Hermitian matrix. The upper triangular part of each A_i will not be referenced. As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) to the next (A_i+1) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCher | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
her performs the matrix-vector operations
A := A + alpha*x*x**H
where alpha is a real scalar, x is a vector, and A is an n by n Hermitian matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in A. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in A. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda * n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The lower triangluar portion will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The upper triangular portion will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of A. Must be at least max(1, n). |
| hipblasStatus_t hipblasCher2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
her2 performs the matrix-vector operations
A := A + alpha*x*y**H + conj(alpha)*y*x**H
where alpha is a complex scalar, x and y are vectors, and A is an n by n Hermitian matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The lower triangular portion of A will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The upper triangular portion of A will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of A. Must be at least max(lda, 1). |
| hipblasStatus_t hipblasCher2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex *const | y[], | ||
| int | incy, | ||
| hipblasComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
her2Batched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n Hermitian matrix for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing the specified triangular portion of each Hermitian matrix A_i of size (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be at least max(lda, 1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCher2k | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| const float * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
her2k performs one of the matrix-matrix operations for a Hermitian rank-2k update
C := alpha*op( A )*op( B )^H + conj(alpha)*op( B )*op( A )^H + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrices, and C is a n x n Hermitian matrix stored as either upper or lower.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^H, op( B ) = B^H, and A and B are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A ) = A^H, op( B ) = B^H HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasCher2kBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| const float * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
her2kBatched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update
C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCher2kStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const float * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
her2kStridedBatched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update
C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A_i ) = A_i^H, op( B_i ) = B_i^H HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCher2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
her2StridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n Hermitian matrix for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the stride between the beginning of one vector (x_i) and the next (x_i+1). |
| [in] | y | device pointer pointing to the first vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the stride between the beginning of one vector (y_i) and the next (y_i+1). |
| [in,out] | AP | device pointer pointing to the first matrix (A_1). Stores the specified triangular portion of each Hermitian matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be at least max(lda, 1). |
| [in] | strideA | [hipblasStride] specifies the stride between the beginning of one matrix (A_i) and the next (A_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCherBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
herBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in A. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in A. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the specified triangular portion of each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be at least max(1, n). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCherk | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const float * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
herk performs one of the matrix-matrix operations for a Hermitian rank-k update
C := alpha*op( A )*op( A )^H + beta*C
where alpha and beta are scalars, op(A) is an n by k matrix, and C is a n x n Hermitian matrix stored as either upper or lower.
op( A ) = A, and A is n by k if transA == HIPBLAS_OP_N op( A ) = A^H and A is k by n if transA == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_ON_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if transA = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasCherkBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const float * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A) is an n by k matrix, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^H and A_i is k by n if transA == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCherkStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkStridedBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A) is an n by k matrix, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^H and A_i is k by n if transA == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCherkx | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| const float * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
herkx performs one of the matrix-matrix operations for a Hermitian rank-k update
C := alpha*op( A )*op( B )^H + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrices, and C is a n x n Hermitian matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^H, op( B ) = B^H, and A and B are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A ) = A^H, op( B ) = B^H HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasCherkxBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| const float * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkxBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCherkxStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const float * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkxStridedBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A_i ) = A_i^H, op( B_i ) = B_i^H HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCherStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
herStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n Hermitian matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in A. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in A. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device array of device pointers storing the specified triangular portion of each Hermitian matrix A_i. Points to the first matrix (A_1). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChpmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
hpmv performs the matrix-vector operation
y := alpha*A*x + beta*y
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian matrix, supplied in packed form (see description below).
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied in AP. |
| [in] | n | [int] the order of the matrix A, must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing the packed version of the specified triangular portion of the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)] (3,-2) (5, 1) (6, 0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)] (3,-2) (5, 1) (6, 0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasChpmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpmvBatched performs the matrix-vector operation
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, supplied in packed form (see description below), for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of each Hermitian matrix A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of each Hermitian matrix A_i is supplied in AP. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i. Each A_i is of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)] (3,-2) (5, 1) (6, 0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)] (3,-2) (5, 1) (6, 0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChpmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpmvStridedBatched performs the matrix-vector operation
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, supplied in packed form (see description below), for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of each Hermitian matrix A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of each Hermitian matrix A_i is supplied in AP. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer pointing to the beginning of the first matrix (AP_1). Stores the packed version of the specified triangular portion of each Hermitian matrix AP_i of size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)] (3,-2) (5, 1) (6, 0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)] (3,-2) (5, 1) (6, 0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (AP_i) and the next one (AP_i+1). |
| [in] | x | device array pointing to the beginning of the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array pointing to the beginning of the first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChpr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | AP | ||
| ) |
BLAS Level 2 API.
hpr performs the matrix-vector operations
A := A + alpha*x*x**H
where alpha is a real scalar, x is a vector, and A is an n by n Hermitian matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| hipblasStatus_t hipblasChpr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasComplex * | AP | ||
| ) |
BLAS Level 2 API.
hpr2 performs the matrix-vector operations
A := A + alpha*x*y**H + conj(alpha)*y*x**H
where alpha is a complex scalar, x and y are vectors, and A is an n by n Hermitian matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| hipblasStatus_t hipblasChpr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex *const | y[], | ||
| int | incy, | ||
| hipblasComplex *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpr2Batched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChpr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasComplex * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpr2StridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | y | device pointer pointing to the first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i. Points to the first matrix (A_1). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChprBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hprBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasChprStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hprStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i. Points to the first matrix (A_1). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCreate | ( | hipblasHandle_t * | handle | ) |
Create hipblas handle.
| hipblasStatus_t hipblasCrot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| const float * | c, | ||
| const hipblasComplex * | s | ||
| ) |
BLAS Level 1 API.
rot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | c | device pointer or host pointer storing scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer storing scalar sine component of the rotation matrix. |
| hipblasStatus_t hipblasCrotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| const float * | c, | ||
| const hipblasComplex * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device array of deivce pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasCrotg | ( | hipblasHandle_t | handle, |
| hipblasComplex * | a, | ||
| hipblasComplex * | b, | ||
| float * | c, | ||
| hipblasComplex * | s | ||
| ) |
BLAS Level 1 API.
rotg creates the Givens rotation matrix for the vector (a b). Scalars c and s and arrays a and b may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device pointer or host pointer to input vector element, overwritten with r. |
| [in,out] | b | device pointer or host pointer to input vector element, overwritten with z. |
| [in,out] | c | device pointer or host pointer to cosine element of Givens rotation. |
| [in,out] | s | device pointer or host pointer sine element of Givens rotation. |
| hipblasStatus_t hipblasCrotgBatched | ( | hipblasHandle_t | handle, |
| hipblasComplex *const | a[], | ||
| hipblasComplex *const | b[], | ||
| float *const | c[], | ||
| hipblasComplex *const | s[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgBatched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device array of device pointers storing each single input vector element a_i, overwritten with r_i. |
| [in,out] | b | device array of device pointers storing each single input vector element b_i, overwritten with z_i. |
| [in,out] | c | device array of device pointers storing each cosine element of Givens rotation for the batch. |
| [in,out] | s | device array of device pointers storing each sine element of Givens rotation for the batch. |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasCrotgStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasComplex * | a, | ||
| hipblasStride | stridea, | ||
| hipblasComplex * | b, | ||
| hipblasStride | strideb, | ||
| float * | c, | ||
| hipblasStride | stridec, | ||
| hipblasComplex * | s, | ||
| hipblasStride | strides, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgStridedBatched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device strided_batched pointer or host strided_batched pointer to first single input vector element a_1, overwritten with r. |
| [in] | stridea | [hipblasStride] distance between elements of a in batch (distance between a_i and a_(i + 1)) |
| [in,out] | b | device strided_batched pointer or host strided_batched pointer to first single input vector element b_1, overwritten with z. |
| [in] | strideb | [hipblasStride] distance between elements of b in batch (distance between b_i and b_(i + 1)) |
| [in,out] | c | device strided_batched pointer or host strided_batched pointer to first cosine element of Givens rotations c_1. |
| [in] | stridec | [hipblasStride] distance between elements of c in batch (distance between c_i and c_(i + 1)) |
| [in,out] | s | device strided_batched pointer or host strided_batched pointer to sine element of Givens rotations s_1. |
| [in] | strides | [hipblasStride] distance between elements of s in batch (distance between s_i and s_(i + 1)) |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasCrotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const float * | c, | ||
| const hipblasComplex * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotStridedBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment from the beginning of x_i to the beginning of x_(i+1) |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment from the beginning of y_i to the beginning of y_(i+1) |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasCscal | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 1 API.
scal scales each element of vector x with scalar alpha.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasCscalBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasCscalStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalStridedBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i ,
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasCspr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | AP | ||
| ) |
BLAS Level 2 API.
spr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the symmetric matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| hipblasStatus_t hipblasCsprBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsprStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of each symmetric matrix A_i. Points to the first A_1. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsrot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| const float * | c, | ||
| const float * | s | ||
| ) |
BLAS Level 1 API.
rot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | c | device pointer or host pointer storing scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer storing scalar sine component of the rotation matrix. |
| hipblasStatus_t hipblasCsrotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| const float * | c, | ||
| const float * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device array of deivce pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasCsrotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const float * | c, | ||
| const float * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotStridedBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment from the beginning of x_i to the beginning of x_(i+1) |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment from the beginning of y_i to the beginning of y_(i+1) |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasCsscal | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 1 API.
scal scales each element of vector x with scalar alpha.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasCsscalBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasCsscalStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalStridedBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i ,
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasCswap | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
swap interchanges vectors x and y.
y := x; x := y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasCswapBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCswapStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapStridedBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsymm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
symm performs one of the matrix-matrix operations:
C := alpha*A*B + beta*C if side == HIPBLAS_SIDE_LEFT, C := alpha*B*A + beta*C if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B and C are m by n matrices, and A is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C := alpha*A*B + beta*C HIPBLAS_SIDE_RIGHT: C := alpha*B*A + beta*C |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B and C. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B and C. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A and B are not referenced. |
| [in] | AP | pointer storing matrix A on the GPU. A is m by m if side == HIPBLAS_SIDE_LEFT A is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | pointer storing matrix B on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| hipblasStatus_t hipblasCsymmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C_i need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C_i. ldc >= max( 1, m ) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsymmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmStridedBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device pointer to first matrix A_1 A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | device pointer to first matrix B_1 of dimension (ldb, n) on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device pointer to first matrix C_1 of dimension (ldc, n) on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsymv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
symv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | lda | [int] specifies the leading dimension of A |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasCsymvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCsymvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCsyr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasCsyr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr2 performs the matrix-vector operations
A := A + alpha*x*y**T + alpha*y*x**T
where alpha is a scalar, x and y are vectors, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasCsyr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| const hipblasComplex *const | y[], | ||
| int | incy, | ||
| hipblasComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2Batched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrix, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCsyr2k | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syr2k performs one of the matrix-matrix operations for a symmetric rank-2k update
C := alpha*(op( A )*op( B )^T + op( B )*op( A )^T) + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasCsyr2kBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsyr2kStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsyr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2StridedBatched the matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the pointer increment between vectors (y_i) and (y_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCsyrBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| hipblasComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrBatched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, x is an array of vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCsyrk | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrk performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( A )^T + beta*C
where alpha and beta are scalars, op(A) is an n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, and A is n by k if transA == HIPBLAS_OP_N op( A ) = A^T and A is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if transA = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasCsyrkBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsyrkStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsyrkx | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrkx performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( B )^T + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be symmetric.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasCsyrkxBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsyrkxStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasComplex * | beta, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCsyrStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrStridedBatched performs the matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCtbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbmv performs one of the matrix-vector operations
x := A*x or x := A**T*x or x := A**H*x,
x is a vectors and A is a banded m by m matrix (see description below).
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of A is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of A's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by A. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of the matrix A. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of the matrix A. k must satisfy k > 0 && k < lda. |
| [in] | AP | device pointer storing banded triangular matrix A. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of A. lda must satisfy lda > k. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasCtbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array of device pointers storing each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in,out] | x | device array of device pointer storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCtbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvStridedBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | x | device array to the first vector x_i of the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i matrix to the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCtbsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbsv solves
A*x = b or A**T*x = b or A**H*x = b,
where x and b are vectors and A is a banded triangular matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of b. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of A. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of A. k >= 0. |
| [in] | AP | device pointer storing the matrix A in banded format. |
| [in] | lda | [int] specifies the leading dimension of A. lda >= (k + 1). |
| [in,out] | x | device pointer storing input vector b. Overwritten by the output vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasCtbsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device vector of device pointers storing each matrix A_i in banded format. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in,out] | x | device vector of device pointers storing each input vector b_i. Overwritten by each output vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCtbsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device pointer pointing to the first banded matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in] | strideA | [hipblasStride] specifies the distance between the start of one matrix (A_i) and the next (A_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by output vectors x. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the distance between the start of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasCtpmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix, supplied in the pack form.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension at leat ( m * ( m + 1 ) / 2 ). Before entry with uplo = HIPBLAS_FILL_MODE_UPPER, the array A must contain the upper triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{0,1} and a_{1, 1} respectively, and so on. Before entry with uplo = HIPBLAS_FILL_MODE_LOWER, the array A must contain the lower triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{1,0} and a_{2,0} respectively, and so on. Note that when DIAG = HIPBLAS_DIAG_UNIT, the diagonal elements of A are not referenced, but are assumed to be unity. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must not be zero. |
| hipblasStatus_t hipblasCtpmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex *const | AP[], | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasCtpmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasCtpsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpsv solves
A*x = b or A**T*x = b, or A**H*x = b,
where x and b are vectors and A is a triangular matrix stored in the packed format.
The input vector b is overwritten by the output vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing the packed version of matrix A, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device pointer storing vector b on input, overwritten by x on output. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasCtpsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex *const | AP[], | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device array of device pointers storing the packed versions of each matrix A_i, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device array of device pointers storing each input vector b_i, overwritten by x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasCtpsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer pointing to the first packed matrix A_1, of dimension >= (n * (n + 1) / 2) |
| [in] | strideA | [hipblasStride] stride from the beginning of one packed matrix (AP_i) and the next (AP_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by each x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the beginning of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasCtrmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasComplex * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
float* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
const float* BP,
int ldb,
float* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
double* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
const double* BP,
int ldb,
double* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
const hipblasComplex* BP,
int ldb,
hipblasComplex* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasDoubleComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
const hipblasDoubleComplex* BP,
int ldb,
hipblasDoubleComplex* CP,
int ldc);
#endif
The deprecated Legacy BLAS in-place trmm performs one of the matrix-matrix operations:
B := alpha*op( A )*B, or B := alpha*B*op( A ),
The new trmm performs one of the matrix-matrix operations:
C := alpha*op( A )*B, or C := alpha*B*op( A ),
The in-place functionality is still available in the new trmmm by setting pointer C equal to pointer B, and ldc equal to ldb.
alpha is a scalar, B is an m by n matrix, C is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A^T or op( A ) = A^H. When uplo == rocblas_fill_upper the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. When uplo == rocblas_fill_lower the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. Note that when diag == rocblas_diagonal_unit the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A) multiplies B from the left or right as follows: HIPBLAS_SIDE_LEFT: B := alpha*op( A )*B. HIPBLAS_SIDE_RIGHT: B := alpha*B*op( A ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A is unit triangular as follows: HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
| [in] | AP | Device pointer to matrix A on the GPU. A has dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B must contain the matrix B, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasCtrmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
float* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
const float* const BP[],
int ldb,
float* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
double* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
const double* const BP[],
int ldb,
double* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
hipblasComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
const hipblasComplex* const BP[],
int ldb,
hipblasComplex* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
hipblasDoubleComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
const hipblasDoubleComplex* const BP[],
int ldb,
hipblasDoubleComplex* const CP[],
int ldc,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_batched performs one of the batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B and ldc equal to ldb.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasCtrmmBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| hipblasComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasCtrmmOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasComplex * | CP, | ||
| int | ldc | ||
| ) |
| hipblasStatus_t hipblasCtrmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_strided_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
float* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
const float* BP,
int ldb,
hipblasStride strideB,
float* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
double* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
const double* BP,
int ldb,
hipblasStride strideB,
double* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
hipblasComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
const hipblasComplex* BP,
int ldb,
hipblasStride strideB,
hipblasComplex* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmStridedBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
const hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
hipblasDoubleComplex* BC,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_strided_batched performs one of the strided_batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B, setting ldc equal to ldb, and setting stride_C equal to stride_B.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasCtrmmStridedBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| hipblasComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasCtrmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasCtrmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasCtrmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasCtrsm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasComplex * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
trsm solves
op(A)*X = alpha*B or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is triangular matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
The matrix X is overwritten on B.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time.
(where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer storing matrix A. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasCtrsmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| hipblasComplex *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmBatched performs the following batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are batched m by n matrices, A is triangular batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Matricies are of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasCtrsmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasComplex * | alpha, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmSridedBatched performs the following strided batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are strided batched m by n matrices, A is triangular strided batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer pointing to the first matrix A_1. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasCtrsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trsv solves
A*x = b or A**T*x = b,
where x and b are vectors and A is a triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasCtrsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max(1, m) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCtrsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix, for i = 1, ..., batchCount.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1) |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max( 1, m ). |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_(i + 1) |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasCtrtri | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasComplex * | invA, | ||
| int | ldinvA | ||
| ) |
BLAS Level 3 API.
trtri compute the inverse of a matrix A, namely, invA
and write the result into invA;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] size of matrix A and invA |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [out] | invA | device pointer storing matrix invA. |
| [in] | ldinvA | [int] specifies the leading dimension of invA. |
| hipblasStatus_t hipblasCtrtriBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const hipblasComplex *const | AP[], | ||
| int | lda, | ||
| hipblasComplex * | invA[], | ||
| int | ldinvA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [out] | invA | device array of device pointers storing the inverse of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | batchCount | [int] numbers of matrices in the batch |
| hipblasStatus_t hipblasCtrtriStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const hipblasComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasComplex * | invA, | ||
| int | ldinvA, | ||
| hipblasStride | stride_invA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriStridedBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device pointer pointing to address of first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A. |
| [in] | strideA | [hipblasStride] "batch stride a": stride from the start of one A_i matrix to the next A_(i + 1). |
| [out] | invA | device pointer storing the inverses of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | stride_invA | [hipblasStride] "batch stride invA": stride from the start of one invA_i matrix to the next invA_(i + 1). |
| [in] | batchCount | [int] numbers of matrices in the batch |
| hipblasStatus_t hipblasDasum | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
asum computes the sum of the magnitudes of elements of a real vector x, or the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must be > 0. |
| [in,out] | result | device pointer or host pointer to store the asum product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasDasumBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
asumBatched computes the sum of the magnitudes of the elements in a batch of real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| [out] | result | device array or host array of batchCount size for results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasDasumStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
asumStridedBatched computes the sum of the magnitudes of elements of a real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasDaxpy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
axpy computes constant alpha multiplied by vector x, plus vector y
y := alpha * x + y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasDaxpyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| const double *const | x[], | ||
| int | incx, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyBatched compute y := alpha * x + y over a set of batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDaxpyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyStridedBatched compute y := alpha * x + y over a set of strided batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] specifies the increment between vectors of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] specifies the increment between vectors of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDcopy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
copy copies each element x[i] into y[i], for i = 1 , ... , n
y := x,
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x to be copied to y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasDcopyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double *const | x[], | ||
| int | incx, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDcopyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyStridedBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_y, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDdgmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | x, | ||
| int | incx, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
dgmm performs one of the matrix-matrix operations
C = A * diag(x) if side == HIPBLAS_SIDE_RIGHT C = diag(x) * A if side == HIPBLAS_SIDE_LEFT
where C and A are m by n dimensional matrices. diag( x ) is a diagonal matrix and x is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between values of x |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasDdgmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | x[], | ||
| int | incx, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_RIGHT C_i = diag(x_i) * A_i for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_LEFT
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A_i. |
| [in] | x | device array of device pointers storing each vector x_i on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x_i |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDdgmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmStridedBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) if side == HIPBLAS_SIDE_RIGHT for i = 0, 1, ... batchCount-1 C_i = diag(x_i) * A_i if side == HIPBLAS_SIDE_LEFT for i = 0, 1, ... batchCount-1
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | pointer to the first vector x_0 on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x |
| [in] | stridex | [hipblasStride] stride from the start of one vector(x_i) and the next one (x_i+1) |
| [in,out] | CP | device pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasDdot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | y, | ||
| int | incy, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasDdotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasDdotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasDestroy | ( | hipblasHandle_t | handle | ) |
Destroys the library context created using hipblasCreate()
| hipblasStatus_t hipblasDgbmv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gbmv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n banded matrix with kl sub-diagonals and ku super-diagonals.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing banded matrix A. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasDgbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double * | beta, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | kl | [int] number of sub-diagonals of each A_i |
| [in] | ku | [int] number of super-diagonals of each A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each banded matrix A_i. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be >= (kl + ku + 1) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasDgbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to first banded matrix (A_1). Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (x_i+1) |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasDgeam | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | beta, | ||
| const double * | BP, | ||
| int | ldb, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
geam performs one of the matrix-matrix operations
C = alpha*op( A ) + beta*op( B ),
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by n matrix, op( B ) an m by n matrix, and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasDgeamBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double * | beta, | ||
| const double *const | BP[], | ||
| int | ldb, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasDgeamStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | beta, | ||
| const double * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| double * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamStridedBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | pointer to the first matrix B_0 on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in,out] | CP | pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasDgels | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| double * | A, | ||
| const int | lda, | ||
| double * | B, | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo | ||
| ) |
GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF").
Depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A X = B & \: \text{not transposed, or}\\ A' X = B & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X is found by minimizing
\[ || B - A X || \quad \text{(or} \: || B - A' X ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X is chosen such that \(|| X ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of matrix A. |
| [in] | n | int. n >= 0. The number of columns of matrix A. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of matrices B and X; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the matrix A. On exit, the QR (or LQ) factorization of A as returned by "GEQRF" (or "GELQF"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrix A. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the matrix B. On exit, when info = 0, B is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrix B. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int on the GPU. If info = 0, successful exit. If info = i > 0, the solution could not be computed because input matrix A is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| hipblasStatus_t hipblasDgelsBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| double *const | A[], | ||
| const int | lda, | ||
| double *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batchCount | ||
| ) |
gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | array of pointer to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_BATCHED" (or "GELQF_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in,out] | B | array of pointer to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the matrices B_j. On exit, when info[j] = 0, B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasDgelsStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| double * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| double * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batch_count | ||
| ) |
gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_STRIDED_BATCHED" (or "GELQF_STRIDED_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_j to the next one A_(j+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [in,out] | B | pointer to type. Array on the GPU (the size depends on the value of strideB). On entry, the matrices B_j. On exit, when info[j] = 0, each B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_j to the next one B_(j+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasDgemm | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | BP, | ||
| int | ldb, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
gemm performs one of the matrix-matrix operations
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] . |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] number or rows of matrices op( A ) and C |
| [in] | n | [int] number of columns of matrices op( B ) and C |
| [in] | k | [int] number of columns of matrix op( A ) and number of rows of matrix op( B ) |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer storing matrix C on the GPU. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasDgemmBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | BP[], | ||
| int | ldb, | ||
| const double * | beta, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmBatched performs one of the batched matrix-matrix operations C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount. where op( X ) is one of op( X ) = X or op( X ) = X**T or op( X ) = X**H, alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | BP | device array of device pointers storing each matrix B_i. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device array of device pointers storing each matrix C_i. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | batchCount | [int] number of gemm operations in the batch |
| hipblasStatus_t hipblasDgemmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| long long | strideA, | ||
| const double * | BP, | ||
| int | ldb, | ||
| long long | strideB, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc, | ||
| long long | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmStridedBatched performs one of the strided batched matrix-matrix operations
C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount.
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer pointing to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer pointing to the first matrix C_1. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | strideC | [hipblasStride] stride from the start of one C_i matrix to the next C_(i + 1). |
| [in] | batchCount | [int] number of gemm operatons in the batch |
| hipblasStatus_t hipblasDgemv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gemv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasDgemvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double * | beta, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDgemvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvStridedBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrices A_i |
| [in] | n | [int] number of columns of matrices A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of matrices A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridex >= n * incx, otherwise stridex >= m * incx. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridey >= m * incy, otherwise stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDgeqrf | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| double * | A, | ||
| const int | lda, | ||
| double * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
geqrf computes a QR factorization of a general m-by-n matrix A.
The factorization has the form
\[ A = Q\left[\begin{array}{c} R\\ 0 \end{array}\right] \]
where R is upper triangular (upper trapezoidal if m < n), and Q is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q = H_1H_2\cdots H_k, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_i\) is given by
\[ H_i = I - \text{ipiv}[i] \cdot v_i v_i' \]
where the first i-1 elements of the Householder vector \(v_i\) are zero, and \(v_i[i] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of the matrix A. |
| [in] | n | int. n >= 0. The number of columns of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the m-by-n matrix to be factored. On exit, the elements on and above the diagonal contain the factor R; the elements below the diagonal are the last m - i elements of Householder vector v_i. |
| [in] | lda | int. lda >= m. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to type. Array on the GPU of dimension min(m,n). The Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasDgeqrfBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| double *const | A[], | ||
| const int | lda, | ||
| double *const | ipiv[], | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_i[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | array of pointers to type. Each pointer points to an array on the GPU of dimension min(m, n). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasDgeqrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| double * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| double * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfStridedBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_j[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [out] | ipiv | pointer to type. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use is strideP >= min(m,n). |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasDger | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | y, | ||
| int | incy, | ||
| double * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
ger,geru,gerc performs the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where alpha is a scalar, x and y are vectors, and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of the matrix A. |
| [in] | n | [int] the number of columns of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasDgerBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double *const | y[], | ||
| int | incy, | ||
| double *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerBatched,geruBatched,gercBatched performs a batch of the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of eaceh matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDgerStridedBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerStridedBatched,geruStridedBatched,gercStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**T, OR A_i := A_i + alpha*x_i*y_i**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of each vector x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= m * incx. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size, for a typical case this means stridey >= n * incy. |
| [in,out] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDgetrf | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| double * | A, | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
getrf computes the LU factorization of a general n-by-n matrix A using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization has the form:
\[ A = PLU \]
where P is a permutation matrix, L is lower triangular with unit diagonal elements, and U is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A = LU \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the n-by-n matrix A to be factored. On exit, the factors L and U from the factorization. The unit diagonal elements of L are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to int. Array on the GPU of dimension n. The vector of pivot indices. Elements of ipiv are 1-based indices. For 1 <= i <= n, the row i of the matrix was interchanged with row ipiv[i]. Matrix P of the factorization can be derived from ipiv. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to a int on the GPU. If info = 0, successful exit. If info = j > 0, U is singular. U[j,j] is the first zero pivot. |
| hipblasStatus_t hipblasDgetrfBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| double *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorizations. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | pointer to int. Array on the GPU. Contains the vectors of pivot indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasDgetrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| double * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfStridedBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorization. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [out] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors of pivots indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasDgetriBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| double *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| double *const | C[], | ||
| const int | ldc, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getriBatched computes the inverse \(C_i = A_i^{-1}\) of a batch of general n-by-n matrices \(A_i\).
The inverse is computed by solving the linear system
\[ A_i C_i = I \]
where I is the identity matrix, and \(A_i\) is factorized as \(A_i = P_i L_i U_i\) as given by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of rows and columns of all matrices A_i in the batch. |
| [in] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). The pivot indices returned by getrfBatched. ipiv can be passed in as a nullptr, this will assume that getrfBatched was called without partial pivoting. |
| [out] | C | array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n. If info[i] = 0, the inverse of matrices A_i. Otherwise, undefined. |
| [in] | ldc | int. ldc >= n. Specifies the leading dimension of C_i. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for inversion of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasDgetrs | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| double * | A, | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| double * | B, | ||
| const int | ldb, | ||
| int * | info | ||
| ) |
SOLVER API.
getrs solves a system of n linear equations on n variables in its factorized form.
It solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A X = B & \: \text{not transposed,}\\ A^T X = B & \: \text{transposed, or}\\ A^H X = B & \: \text{conjugate transposed.} \end{array} \]
Matrix A is defined by its triangular factors as returned by getrf.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of A. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of the matrix B. |
| [in] | A | pointer to type. Array on the GPU of dimension lda*n. The factors L and U of the factorization A = P*L*U returned by getrf. |
| [in] | lda | int. lda >= n. The leading dimension of A. |
| [in] | ipiv | pointer to int. Array on the GPU of dimension n. The pivot indices returned by getrf. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrix B. On exit, the solution matrix X. |
| [in] | ldb | int. ldb >= n. The leading dimension of B. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasDgetrsBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| double *const | A[], | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| double *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU. Contains the vectors ipiv_i of pivot indices returned by getrfBatched. |
| [in,out] | B | Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasDgetrsStridedBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| double * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| const int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| double * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsStridedBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfStridedBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfStridedBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of pivot indices returned by getrfStridedBatched. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [in,out] | B | pointer to type. Array on the GPU (size depends on the value of strideB). On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_i to the next one B_(i+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasDnrm2 | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
nrm2 computes the euclidean norm of a real or complex vector
result := sqrt( x'*x ) for real vectors
result := sqrt( x**H*x ) for complex vectors
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the nrm2 product. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasDnrm2Batched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
nrm2Batched computes the euclidean norm over a batch of real or complex vectors
result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array of batchCount size for nrm2 results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasDnrm2StridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
nrm2StridedBatched computes the euclidean norm over a batch of real or complex vectors
:= sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
:= sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasDotBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| const void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| int | batchCount, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
dotBatchedEx performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatchedEx performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
- Supported types are determined by the backend. See rocBLAS/cuBLAS documentation.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasDotcBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| const void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| int | batchCount, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
dotBatchedEx performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatchedEx performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
- Supported types are determined by the backend. See rocBLAS/cuBLAS documentation.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasDotcEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| const void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
dotEx performs the dot product of vectors x and y
result = x * y;
dotcEx performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y; - Supported types are determined by the backend. See rocBLAS/cuBLAS documentation.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasDotcStridedBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
dotStridedBatchedEx performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotc_strided_batched_ex performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
- Supported types are determined by the backend. See rocBLAS/cuBLAS documentation.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasDotEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| const void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
dotEx performs the dot product of vectors x and y
result = x * y;
dotcEx performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y; - Supported types are determined by the backend. See rocBLAS/cuBLAS documentation.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasDotStridedBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
dotStridedBatchedEx performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotc_strided_batched_ex performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
- Supported types are determined by the backend. See rocBLAS/cuBLAS documentation.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasDrot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy, | ||
| const double * | c, | ||
| const double * | s | ||
| ) |
BLAS Level 1 API.
rot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | c | device pointer or host pointer storing scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer storing scalar sine component of the rotation matrix. |
| hipblasStatus_t hipblasDrotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double *const | x[], | ||
| int | incx, | ||
| double *const | y[], | ||
| int | incy, | ||
| const double * | c, | ||
| const double * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device array of deivce pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasDrotg | ( | hipblasHandle_t | handle, |
| double * | a, | ||
| double * | b, | ||
| double * | c, | ||
| double * | s | ||
| ) |
BLAS Level 1 API.
rotg creates the Givens rotation matrix for the vector (a b). Scalars c and s and arrays a and b may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device pointer or host pointer to input vector element, overwritten with r. |
| [in,out] | b | device pointer or host pointer to input vector element, overwritten with z. |
| [in,out] | c | device pointer or host pointer to cosine element of Givens rotation. |
| [in,out] | s | device pointer or host pointer sine element of Givens rotation. |
| hipblasStatus_t hipblasDrotgBatched | ( | hipblasHandle_t | handle, |
| double *const | a[], | ||
| double *const | b[], | ||
| double *const | c[], | ||
| double *const | s[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgBatched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device array of device pointers storing each single input vector element a_i, overwritten with r_i. |
| [in,out] | b | device array of device pointers storing each single input vector element b_i, overwritten with z_i. |
| [in,out] | c | device array of device pointers storing each cosine element of Givens rotation for the batch. |
| [in,out] | s | device array of device pointers storing each sine element of Givens rotation for the batch. |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasDrotgStridedBatched | ( | hipblasHandle_t | handle, |
| double * | a, | ||
| hipblasStride | stridea, | ||
| double * | b, | ||
| hipblasStride | strideb, | ||
| double * | c, | ||
| hipblasStride | stridec, | ||
| double * | s, | ||
| hipblasStride | strides, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgStridedBatched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device strided_batched pointer or host strided_batched pointer to first single input vector element a_1, overwritten with r. |
| [in] | stridea | [hipblasStride] distance between elements of a in batch (distance between a_i and a_(i + 1)) |
| [in,out] | b | device strided_batched pointer or host strided_batched pointer to first single input vector element b_1, overwritten with z. |
| [in] | strideb | [hipblasStride] distance between elements of b in batch (distance between b_i and b_(i + 1)) |
| [in,out] | c | device strided_batched pointer or host strided_batched pointer to first cosine element of Givens rotations c_1. |
| [in] | stridec | [hipblasStride] distance between elements of c in batch (distance between c_i and c_(i + 1)) |
| [in,out] | s | device strided_batched pointer or host strided_batched pointer to sine element of Givens rotations s_1. |
| [in] | strides | [hipblasStride] distance between elements of s in batch (distance between s_i and s_(i + 1)) |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasDrotm | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy, | ||
| const double * | param | ||
| ) |
BLAS Level 1 API.
rotm applies the modified Givens rotation matrix defined by param to vectors x and y.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | param | device vector or host vector of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| hipblasStatus_t hipblasDrotmBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double *const | x[], | ||
| int | incx, | ||
| double *const | y[], | ||
| int | incy, | ||
| const double *const | param[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmBatched applies the modified Givens rotation matrix defined by param_i to batched vectors x_i and y_i, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | param | device array of device vectors of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may ONLY be stored on the device for the batched version of this function. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasDrotmg | ( | hipblasHandle_t | handle, |
| double * | d1, | ||
| double * | d2, | ||
| double * | x1, | ||
| const double * | y1, | ||
| double * | param | ||
| ) |
BLAS Level 1 API.
rotmg creates the modified Givens rotation matrix for the vector (d1 * x1, d2 * y1). Parameters may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | d1 | device pointer or host pointer to input scalar that is overwritten. |
| [in,out] | d2 | device pointer or host pointer to input scalar that is overwritten. |
| [in,out] | x1 | device pointer or host pointer to input scalar that is overwritten. |
| [in] | y1 | device pointer or host pointer to input scalar. |
| [out] | param | device vector or host vector of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| hipblasStatus_t hipblasDrotmgBatched | ( | hipblasHandle_t | handle, |
| double *const | d1[], | ||
| double *const | d2[], | ||
| double *const | x1[], | ||
| const double *const | y1[], | ||
| double *const | param[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmgBatched creates the modified Givens rotation matrix for the batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batchCount. Parameters may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | d1 | device batched array or host batched array of input scalars that is overwritten. |
| [in,out] | d2 | device batched array or host batched array of input scalars that is overwritten. |
| [in,out] | x1 | device batched array or host batched array of input scalars that is overwritten. |
| [in] | y1 | device batched array or host batched array of input scalars. |
| [out] | param | device batched array or host batched array of vectors of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| [in] | batchCount | [int] the number of instances in the batch. |
| hipblasStatus_t hipblasDrotmgStridedBatched | ( | hipblasHandle_t | handle, |
| double * | d1, | ||
| hipblasStride | strided1, | ||
| double * | d2, | ||
| hipblasStride | strided2, | ||
| double * | x1, | ||
| hipblasStride | stridex1, | ||
| const double * | y1, | ||
| hipblasStride | stridey1, | ||
| double * | param, | ||
| hipblasStride | strideParam, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmgStridedBatched creates the modified Givens rotation matrix for the strided batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batchCount. Parameters may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | d1 | device strided_batched array or host strided_batched array of input scalars that is overwritten. |
| [in] | strided1 | [hipblasStride] specifies the increment between the beginning of d1_i and d1_(i+1) |
| [in,out] | d2 | device strided_batched array or host strided_batched array of input scalars that is overwritten. |
| [in] | strided2 | [hipblasStride] specifies the increment between the beginning of d2_i and d2_(i+1) |
| [in,out] | x1 | device strided_batched array or host strided_batched array of input scalars that is overwritten. |
| [in] | stridex1 | [hipblasStride] specifies the increment between the beginning of x1_i and x1_(i+1) |
| [in] | y1 | device strided_batched array or host strided_batched array of input scalars. |
| [in] | stridey1 | [hipblasStride] specifies the increment between the beginning of y1_i and y1_(i+1) |
| [out] | param | device stridedBatched array or host stridedBatched array of vectors of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| [in] | strideParam | [hipblasStride] specifies the increment between the beginning of param_i and param_(i + 1) |
| [in] | batchCount | [int] the number of instances in the batch. |
| hipblasStatus_t hipblasDrotmStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const double * | param, | ||
| hipblasStride | strideParam, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmStridedBatched applies the modified Givens rotation matrix defined by param_i to strided batched vectors x_i and y_i, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer pointing to first strided batched vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment between the beginning of x_i and x_(i + 1) |
| [in,out] | y | device pointer pointing to first strided batched vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment between the beginning of y_i and y_(i + 1) |
| [in] | param | device pointer pointing to first array of 5 elements defining the rotation (param_1). param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may ONLY be stored on the device for the strided_batched version of this function. |
| [in] | strideParam | [hipblasStride] specifies the increment between the beginning of param_i and param_(i + 1) |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasDrotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const double * | c, | ||
| const double * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotStridedBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment from the beginning of x_i to the beginning of x_(i+1) |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment from the beginning of y_i to the beginning of y_(i+1) |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasDsbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
sbmv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n symmetric banded matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | k | [int] specifies the number of sub- and super-diagonals |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | lda | [int] specifies the leading dimension of matrix A |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasDsbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double * | beta, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sbmvBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric banded matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n symmetric banded matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | k | [int] specifies the number of sub- and super-diagonals |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDsbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sbmvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric banded matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n symmetric banded matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | k | [int] specifies the number of sub- and super-diagonals |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDscal | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| double * | x, | ||
| int | incx | ||
| ) |
BLAS Level 1 API.
scal scales each element of vector x with scalar alpha.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasDscalBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| double *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasDscalStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalStridedBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i ,
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasDspmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
spmv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n packed symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasDspmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double * | beta, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spmvBatched performs the matrix-vector operation:
y_i := alpha*AP_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n packed symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDspmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| hipblasStride | strideA, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spmvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n packed symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDspr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| double * | AP | ||
| ) |
BLAS Level 2 API.
spr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the symmetric matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| hipblasStatus_t hipblasDspr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | y, | ||
| int | incy, | ||
| double * | AP | ||
| ) |
BLAS Level 2 API.
spr2 performs the matrix-vector operation
A := A + alpha*x*y**T + alpha*y*x**T
where alpha is a scalar, x and y are vectors, and A is an n by n symmetric matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the symmetric matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(n) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| hipblasStatus_t hipblasDspr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double *const | y[], | ||
| int | incy, | ||
| double *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spr2Batched performs the matrix-vector operation
A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T
where alpha is a scalar, x_i and y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(n) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDspr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| double * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spr2StridedBatched performs the matrix-vector operation
A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T
where alpha is a scalar, x_i amd y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | y | device pointer pointing to the first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of each symmetric matrix A_i. Points to the first A_1. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(n) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsprBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | x[], | ||
| int | incx, | ||
| double *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsprStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of each symmetric matrix A_i. Points to the first A_1. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDswap | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double * | x, | ||
| int | incx, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
swap interchanges vectors x and y.
y := x; x := y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasDswapBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double *const | x[], | ||
| int | incx, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDswapStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapStridedBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsymm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | BP, | ||
| int | ldb, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
symm performs one of the matrix-matrix operations:
C := alpha*A*B + beta*C if side == HIPBLAS_SIDE_LEFT, C := alpha*B*A + beta*C if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B and C are m by n matrices, and A is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C := alpha*A*B + beta*C HIPBLAS_SIDE_RIGHT: C := alpha*B*A + beta*C |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B and C. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B and C. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A and B are not referenced. |
| [in] | AP | pointer storing matrix A on the GPU. A is m by m if side == HIPBLAS_SIDE_LEFT A is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | pointer storing matrix B on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| hipblasStatus_t hipblasDsymmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | BP[], | ||
| int | ldb, | ||
| const double * | beta, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C_i need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C_i. ldc >= max( 1, m ) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsymmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmStridedBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device pointer to first matrix A_1 A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | device pointer to first matrix B_1 of dimension (ldb, n) on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device pointer to first matrix C_1 of dimension (ldc, n) on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsymv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
symv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | lda | [int] specifies the leading dimension of A |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasDsymvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double * | beta, | ||
| double *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDsymvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | beta, | ||
| double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDsyr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| double * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasDsyr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| const double * | y, | ||
| int | incy, | ||
| double * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr2 performs the matrix-vector operations
A := A + alpha*x*y**T + alpha*y*x**T
where alpha is a scalar, x and y are vectors, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasDsyr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | x[], | ||
| int | incx, | ||
| const double *const | y[], | ||
| int | incy, | ||
| double *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2Batched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrix, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDsyr2k | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | BP, | ||
| int | ldb, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syr2k performs one of the matrix-matrix operations for a symmetric rank-2k update
C := alpha*(op( A )*op( B )^T + op( B )*op( A )^T) + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasDsyr2kBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | BP[], | ||
| int | ldb, | ||
| const double * | beta, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsyr2kStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsyr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const double * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2StridedBatched the matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the pointer increment between vectors (y_i) and (y_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDsyrBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | x[], | ||
| int | incx, | ||
| double *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrBatched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, x is an array of vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDsyrk | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrk performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( A )^T + beta*C
where alpha and beta are scalars, op(A) is an n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, and A is n by k if transA == HIPBLAS_OP_N op( A ) = A^T and A is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if transA = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasDsyrkBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double * | beta, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsyrkStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsyrkx | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | BP, | ||
| int | ldb, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrkx performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( B )^T + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be symmetric.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasDsyrkxBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | BP[], | ||
| int | ldb, | ||
| const double * | beta, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsyrkxStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const double * | beta, | ||
| double * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDsyrStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrStridedBatched performs the matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDtbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const double * | AP, | ||
| int | lda, | ||
| double * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbmv performs one of the matrix-vector operations
x := A*x or x := A**T*x or x := A**H*x,
x is a vectors and A is a banded m by m matrix (see description below).
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of A is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of A's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by A. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of the matrix A. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of the matrix A. k must satisfy k > 0 && k < lda. |
| [in] | AP | device pointer storing banded triangular matrix A. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of A. lda must satisfy lda > k. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasDtbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| double *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array of device pointers storing each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in,out] | x | device array of device pointer storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDtbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvStridedBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | x | device array to the first vector x_i of the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i matrix to the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDtbsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const double * | AP, | ||
| int | lda, | ||
| double * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbsv solves
A*x = b or A**T*x = b or A**H*x = b,
where x and b are vectors and A is a banded triangular matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of b. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of A. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of A. k >= 0. |
| [in] | AP | device pointer storing the matrix A in banded format. |
| [in] | lda | [int] specifies the leading dimension of A. lda >= (k + 1). |
| [in,out] | x | device pointer storing input vector b. Overwritten by the output vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasDtbsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| double *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device vector of device pointers storing each matrix A_i in banded format. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in,out] | x | device vector of device pointers storing each input vector b_i. Overwritten by each output vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDtbsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device pointer pointing to the first banded matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in] | strideA | [hipblasStride] specifies the distance between the start of one matrix (A_i) and the next (A_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by output vectors x. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the distance between the start of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasDtpmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| double * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix, supplied in the pack form.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension at leat ( m * ( m + 1 ) / 2 ). Before entry with uplo = HIPBLAS_FILL_MODE_UPPER, the array A must contain the upper triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{0,1} and a_{1, 1} respectively, and so on. Before entry with uplo = HIPBLAS_FILL_MODE_LOWER, the array A must contain the lower triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{1,0} and a_{2,0} respectively, and so on. Note that when DIAG = HIPBLAS_DIAG_UNIT, the diagonal elements of A are not referenced, but are assumed to be unity. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must not be zero. |
| hipblasStatus_t hipblasDtpmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double *const | AP[], | ||
| double *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasDtpmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| hipblasStride | strideA, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasDtpsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| double * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpsv solves
A*x = b or A**T*x = b, or A**H*x = b,
where x and b are vectors and A is a triangular matrix stored in the packed format.
The input vector b is overwritten by the output vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing the packed version of matrix A, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device pointer storing vector b on input, overwritten by x on output. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasDtpsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double *const | AP[], | ||
| double *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device array of device pointers storing the packed versions of each matrix A_i, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device array of device pointers storing each input vector b_i, overwritten by x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasDtpsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| hipblasStride | strideA, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer pointing to the first packed matrix A_1, of dimension >= (n * (n + 1) / 2) |
| [in] | strideA | [hipblasStride] stride from the beginning of one packed matrix (AP_i) and the next (AP_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by each x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the beginning of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasDtrmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| double * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
float* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
const float* BP,
int ldb,
float* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
double* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
const double* BP,
int ldb,
double* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
const hipblasComplex* BP,
int ldb,
hipblasComplex* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasDoubleComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
const hipblasDoubleComplex* BP,
int ldb,
hipblasDoubleComplex* CP,
int ldc);
#endif
The deprecated Legacy BLAS in-place trmm performs one of the matrix-matrix operations:
B := alpha*op( A )*B, or B := alpha*B*op( A ),
The new trmm performs one of the matrix-matrix operations:
C := alpha*op( A )*B, or C := alpha*B*op( A ),
The in-place functionality is still available in the new trmmm by setting pointer C equal to pointer B, and ldc equal to ldb.
alpha is a scalar, B is an m by n matrix, C is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A^T or op( A ) = A^H. When uplo == rocblas_fill_upper the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. When uplo == rocblas_fill_lower the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. Note that when diag == rocblas_diagonal_unit the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A) multiplies B from the left or right as follows: HIPBLAS_SIDE_LEFT: B := alpha*op( A )*B. HIPBLAS_SIDE_RIGHT: B := alpha*B*op( A ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A is unit triangular as follows: HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
| [in] | AP | Device pointer to matrix A on the GPU. A has dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B must contain the matrix B, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasDtrmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| double *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
float* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
const float* const BP[],
int ldb,
float* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
double* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
const double* const BP[],
int ldb,
double* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
hipblasComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
const hipblasComplex* const BP[],
int ldb,
hipblasComplex* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
hipblasDoubleComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
const hipblasDoubleComplex* const BP[],
int ldb,
hipblasDoubleComplex* const CP[],
int ldc,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_batched performs one of the batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B and ldc equal to ldb.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasDtrmmBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| const double *const | BP[], | ||
| int | ldb, | ||
| double *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasDtrmmOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| const double * | BP, | ||
| int | ldb, | ||
| double * | CP, | ||
| int | ldc | ||
| ) |
| hipblasStatus_t hipblasDtrmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| double * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_strided_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
float* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
const float* BP,
int ldb,
hipblasStride strideB,
float* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
double* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
const double* BP,
int ldb,
hipblasStride strideB,
double* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
hipblasComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
const hipblasComplex* BP,
int ldb,
hipblasStride strideB,
hipblasComplex* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmStridedBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
const hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
hipblasDoubleComplex* BC,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_strided_batched performs one of the strided_batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B, setting ldc equal to ldb, and setting stride_C equal to stride_B.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasDtrmmStridedBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| double * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasDtrmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| int | lda, | ||
| double * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasDtrmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| double *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasDtrmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasDtrsm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| double * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
trsm solves
op(A)*X = alpha*B or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is triangular matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
The matrix X is overwritten on B.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time.
(where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer storing matrix A. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasDtrsmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| double *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmBatched performs the following batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are batched m by n matrices, A is triangular batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Matricies are of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasDtrsmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const double * | alpha, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| double * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmSridedBatched performs the following strided batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are strided batched m by n matrices, A is triangular strided batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer pointing to the first matrix A_1. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasDtrsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| int | lda, | ||
| double * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trsv solves
A*x = b or A**T*x = b,
where x and b are vectors and A is a triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasDtrsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| double *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max(1, m) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDtrsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix, for i = 1, ..., batchCount.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1) |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max( 1, m ). |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_(i + 1) |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasDtrtri | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const double * | AP, | ||
| int | lda, | ||
| double * | invA, | ||
| int | ldinvA | ||
| ) |
BLAS Level 3 API.
trtri compute the inverse of a matrix A, namely, invA
and write the result into invA;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] size of matrix A and invA |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [out] | invA | device pointer storing matrix invA. |
| [in] | ldinvA | [int] specifies the leading dimension of invA. |
| hipblasStatus_t hipblasDtrtriBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const double *const | AP[], | ||
| int | lda, | ||
| double * | invA[], | ||
| int | ldinvA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [out] | invA | device array of device pointers storing the inverse of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | batchCount | [int] numbers of matrices in the batch |
| hipblasStatus_t hipblasDtrtriStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const double * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| double * | invA, | ||
| int | ldinvA, | ||
| hipblasStride | stride_invA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriStridedBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device pointer pointing to address of first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A. |
| [in] | strideA | [hipblasStride] "batch stride a": stride from the start of one A_i matrix to the next A_(i + 1). |
| [out] | invA | device pointer storing the inverses of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | stride_invA | [hipblasStride] "batch stride invA": stride from the start of one invA_i matrix to the next invA_(i + 1). |
| [in] | batchCount | [int] numbers of matrices in the batch |
| hipblasStatus_t hipblasDzasum | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
asum computes the sum of the magnitudes of elements of a real vector x, or the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must be > 0. |
| [in,out] | result | device pointer or host pointer to store the asum product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasDzasumBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
asumBatched computes the sum of the magnitudes of the elements in a batch of real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| [out] | result | device array or host array of batchCount size for results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasDzasumStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
asumStridedBatched computes the sum of the magnitudes of elements of a real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasDznrm2 | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
nrm2 computes the euclidean norm of a real or complex vector
result := sqrt( x'*x ) for real vectors
result := sqrt( x**H*x ) for complex vectors
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the nrm2 product. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasDznrm2Batched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
nrm2Batched computes the euclidean norm over a batch of real or complex vectors
result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array of batchCount size for nrm2 results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasDznrm2StridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| double * | result | ||
| ) |
BLAS Level 1 API.
nrm2StridedBatched computes the euclidean norm over a batch of real or complex vectors
:= sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
:= sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasGemmBatchedEx | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const void * | alpha, | ||
| const void * | A[], | ||
| hipblasDatatype_t | aType, | ||
| int | lda, | ||
| const void * | B[], | ||
| hipblasDatatype_t | bType, | ||
| int | ldb, | ||
| const void * | beta, | ||
| void * | C[], | ||
| hipblasDatatype_t | cType, | ||
| int | ldc, | ||
| int | batchCount, | ||
| hipblasDatatype_t | computeType, | ||
| hipblasGemmAlgo_t | algo | ||
| ) |
BLAS EX API.
gemmBatchedEx performs one of the batched matrix-matrix operations C_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batchCount. where op( X ) is one of op( X ) = X or op( X ) = X**T or op( X ) = X**H, alpha and beta are scalars, and A, B, and C are batched pointers to matrices, with op( A ) an m by k by batchCount batched matrix, op( B ) a k by n by batchCount batched matrix and C a m by n by batchCount batched matrix. The batched matrices are an array of pointers to matrices. The number of pointers to matrices is batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ). |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ). |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | k | [int] matrix dimension k. |
| [in] | alpha | [const void *] device pointer or host pointer specifying the scalar alpha. Same datatype as computeType. |
| [in] | A | [void *] device pointer storing array of pointers to each matrix A_i. |
| [in] | aType | [hipblasDatatype_t] specifies the datatype of each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | B | [void *] device pointer storing array of pointers to each matrix B_i. |
| [in] | bType | [hipblasDatatype_t] specifies the datatype of each matrix B_i. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | beta | [const void *] device pointer or host pointer specifying the scalar beta. Same datatype as computeType. |
| [in] | C | [void *] device array of device pointers to each matrix C_i. |
| [in] | cType | [hipblasDatatype_t] specifies the datatype of each matrix C_i. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | batchCount | [int] number of gemm operations in the batch. |
| [in] | computeType | [hipblasDatatype_t] specifies the datatype of computation. |
| [in] | algo | [hipblasGemmAlgo_t] enumerant specifying the algorithm type. |
| hipblasStatus_t hipblasGemmEx | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const void * | alpha, | ||
| const void * | A, | ||
| hipblasDatatype_t | aType, | ||
| int | lda, | ||
| const void * | B, | ||
| hipblasDatatype_t | bType, | ||
| int | ldb, | ||
| const void * | beta, | ||
| void * | C, | ||
| hipblasDatatype_t | cType, | ||
| int | ldc, | ||
| hipblasDatatype_t | computeType, | ||
| hipblasGemmAlgo_t | algo | ||
| ) |
BLAS EX API.
gemmEx performs one of the matrix-matrix operations
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B, and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C is a m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ). |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ). |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | k | [int] matrix dimension k. |
| [in] | alpha | [const void *] device pointer or host pointer specifying the scalar alpha. Same datatype as computeType. |
| [in] | A | [void *] device pointer storing matrix A. |
| [in] | aType | [hipblasDatatype_t] specifies the datatype of matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | B | [void *] device pointer storing matrix B. |
| [in] | bType | [hipblasDatatype_t] specifies the datatype of matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | beta | [const void *] device pointer or host pointer specifying the scalar beta. Same datatype as computeType. |
| [in] | C | [void *] device pointer storing matrix C. |
| [in] | cType | [hipblasDatatype_t] specifies the datatype of matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | computeType | [hipblasDatatype_t] specifies the datatype of computation. |
| [in] | algo | [hipblasGemmAlgo_t] enumerant specifying the algorithm type. |
| hipblasStatus_t hipblasGemmStridedBatchedEx | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const void * | alpha, | ||
| const void * | A, | ||
| hipblasDatatype_t | aType, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const void * | B, | ||
| hipblasDatatype_t | bType, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const void * | beta, | ||
| void * | C, | ||
| hipblasDatatype_t | cType, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount, | ||
| hipblasDatatype_t | computeType, | ||
| hipblasGemmAlgo_t | algo | ||
| ) |
BLAS EX API.
gemmStridedBatchedEx performs one of the strided_batched matrix-matrix operations
C_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batchCount
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B, and C are strided_batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) a k by n by batchCount strided_batched matrix and C a m by n by batchCount strided_batched matrix.
The strided_batched matrices are multiple matrices separated by a constant stride. The number of matrices is batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ). |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ). |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | k | [int] matrix dimension k. |
| [in] | alpha | [const void *] device pointer or host pointer specifying the scalar alpha. Same datatype as computeType. |
| [in] | A | [void *] device pointer pointing to first matrix A_1. |
| [in] | aType | [hipblasDatatype_t] specifies the datatype of each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] specifies stride from start of one A_i matrix to the next A_(i + 1). |
| [in] | B | [void *] device pointer pointing to first matrix B_1. |
| [in] | bType | [hipblasDatatype_t] specifies the datatype of each matrix B_i. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | strideB | [hipblasStride] specifies stride from start of one B_i matrix to the next B_(i + 1). |
| [in] | beta | [const void *] device pointer or host pointer specifying the scalar beta. Same datatype as computeType. |
| [in] | C | [void *] device pointer pointing to first matrix C_1. |
| [in] | cType | [hipblasDatatype_t] specifies the datatype of each matrix C_i. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | strideC | [hipblasStride] specifies stride from start of one C_i matrix to the next C_(i + 1). |
| [in] | batchCount | [int] number of gemm operations in the batch. |
| [in] | computeType | [hipblasDatatype_t] specifies the datatype of computation. |
| [in] | algo | [hipblasGemmAlgo_t] enumerant specifying the algorithm type. |
| hipblasStatus_t hipblasGetAtomicsMode | ( | hipblasHandle_t | handle, |
| hipblasAtomicsMode_t * | atomics_mode | ||
| ) |
Get hipblasSetAtomicsMode.
| hipblasStatus_t hipblasGetMatrix | ( | int | rows, |
| int | cols, | ||
| int | elemSize, | ||
| const void * | AP, | ||
| int | lda, | ||
| void * | BP, | ||
| int | ldb | ||
| ) |
copy matrix from device to host
| [in] | rows | [int] number of rows in matrices |
| [in] | cols | [int] number of columns in matrices |
| [in] | elemSize | [int] number of bytes per element in the matrix |
| [in] | AP | pointer to matrix on the GPU |
| [in] | lda | [int] specifies the leading dimension of A, lda >= rows |
| [out] | BP | pointer to matrix on the host |
| [in] | ldb | [int] specifies the leading dimension of B, ldb >= rows |
| hipblasStatus_t hipblasGetMatrixAsync | ( | int | rows, |
| int | cols, | ||
| int | elemSize, | ||
| const void * | AP, | ||
| int | lda, | ||
| void * | BP, | ||
| int | ldb, | ||
| hipStream_t | stream | ||
| ) |
asynchronously copy matrix from device to host
hipblasGetMatrixAsync copies a matrix from device memory to pinned host memory asynchronously. Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
| [in] | rows | [int] number of rows in matrices |
| [in] | cols | [int] number of columns in matrices |
| [in] | elemSize | [int] number of bytes per element in the matrix |
| [in] | AP | pointer to matrix on the GPU |
| [in] | lda | [int] specifies the leading dimension of A, lda >= rows |
| [out] | BP | pointer to matrix on the host |
| [in] | ldb | [int] specifies the leading dimension of B, ldb >= rows |
| [in] | stream | specifies the stream into which this transfer request is queued |
| hipblasStatus_t hipblasGetPointerMode | ( | hipblasHandle_t | handle, |
| hipblasPointerMode_t * | mode | ||
| ) |
Get hipblas pointer mode.
| hipblasStatus_t hipblasGetStream | ( | hipblasHandle_t | handle, |
| hipStream_t * | streamId | ||
| ) |
Get stream[0] for handle.
| hipblasStatus_t hipblasGetVector | ( | int | n, |
| int | elemSize, | ||
| const void * | x, | ||
| int | incx, | ||
| void * | y, | ||
| int | incy | ||
| ) |
copy vector from device to host
| [in] | n | [int] number of elements in the vector |
| [in] | elemSize | [int] Size of both vectors in bytes |
| [in] | x | pointer to vector on the device |
| [in] | incx | [int] specifies the increment for the elements of the vector |
| [out] | y | pointer to vector on the host |
| [in] | incy | [int] specifies the increment for the elements of the vector |
| hipblasStatus_t hipblasGetVectorAsync | ( | int | n, |
| int | elemSize, | ||
| const void * | x, | ||
| int | incx, | ||
| void * | y, | ||
| int | incy, | ||
| hipStream_t | stream | ||
| ) |
asynchronously copy vector from device to host
hipblasGetVectorAsync copies a vector from pinned host memory to device memory asynchronously. Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
| [in] | n | [int] number of elements in the vector |
| [in] | elemSize | [int] number of bytes per element in the matrix |
| [in] | x | pointer to vector on the device |
| [in] | incx | [int] specifies the increment for the elements of the vector |
| [out] | y | pointer to vector on the host |
| [in] | incy | [int] specifies the increment for the elements of the vector |
| [in] | stream | specifies the stream into which this transfer request is queued |
| hipblasStatus_t hipblasHaxpy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasHalf * | alpha, | ||
| const hipblasHalf * | x, | ||
| int | incx, | ||
| hipblasHalf * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
axpy computes constant alpha multiplied by vector x, plus vector y
y := alpha * x + y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasHaxpyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasHalf * | alpha, | ||
| const hipblasHalf *const | x[], | ||
| int | incx, | ||
| hipblasHalf *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyBatched compute y := alpha * x + y over a set of batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasHaxpyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasHalf * | alpha, | ||
| const hipblasHalf * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasHalf * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyStridedBatched compute y := alpha * x + y over a set of strided batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] specifies the increment between vectors of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] specifies the increment between vectors of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasHdot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasHalf * | x, | ||
| int | incx, | ||
| const hipblasHalf * | y, | ||
| int | incy, | ||
| hipblasHalf * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasHdotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasHalf *const | x[], | ||
| int | incx, | ||
| const hipblasHalf *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| hipblasHalf * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasHdotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasHalf * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasHalf * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| hipblasHalf * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasHgemm | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasHalf * | alpha, | ||
| const hipblasHalf * | AP, | ||
| int | lda, | ||
| const hipblasHalf * | BP, | ||
| int | ldb, | ||
| const hipblasHalf * | beta, | ||
| hipblasHalf * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
gemm performs one of the matrix-matrix operations
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] . |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] number or rows of matrices op( A ) and C |
| [in] | n | [int] number of columns of matrices op( B ) and C |
| [in] | k | [int] number of columns of matrix op( A ) and number of rows of matrix op( B ) |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer storing matrix C on the GPU. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasHgemmBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasHalf * | alpha, | ||
| const hipblasHalf *const | AP[], | ||
| int | lda, | ||
| const hipblasHalf *const | BP[], | ||
| int | ldb, | ||
| const hipblasHalf * | beta, | ||
| hipblasHalf *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmBatched performs one of the batched matrix-matrix operations C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount. where op( X ) is one of op( X ) = X or op( X ) = X**T or op( X ) = X**H, alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | BP | device array of device pointers storing each matrix B_i. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device array of device pointers storing each matrix C_i. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | batchCount | [int] number of gemm operations in the batch |
| hipblasStatus_t hipblasHgemmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasHalf * | alpha, | ||
| const hipblasHalf * | AP, | ||
| int | lda, | ||
| long long | strideA, | ||
| const hipblasHalf * | BP, | ||
| int | ldb, | ||
| long long | strideB, | ||
| const hipblasHalf * | beta, | ||
| hipblasHalf * | CP, | ||
| int | ldc, | ||
| long long | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmStridedBatched performs one of the strided batched matrix-matrix operations
C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount.
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer pointing to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer pointing to the first matrix C_1. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | strideC | [hipblasStride] stride from the start of one C_i matrix to the next C_(i + 1). |
| [in] | batchCount | [int] number of gemm operatons in the batch |
| hipblasStatus_t hipblasIcamax | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amax finds the first index of the element of maximum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amax index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIcamaxBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host array of pointers of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIcamaxStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxStridedBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasIcamin | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amin finds the first index of the element of minimum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amin index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIcaminBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host pointers to array of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIcaminStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminStridedBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1) |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer to array for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasIdamax | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amax finds the first index of the element of maximum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amax index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIdamaxBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host array of pointers of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIdamaxStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxStridedBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasIdamin | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amin finds the first index of the element of minimum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amin index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIdaminBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host pointers to array of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIdaminStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminStridedBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1) |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer to array for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasIsamax | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amax finds the first index of the element of maximum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amax index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIsamaxBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host array of pointers of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIsamaxStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxStridedBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasIsamin | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amin finds the first index of the element of minimum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amin index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIsaminBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host pointers to array of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIsaminStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminStridedBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1) |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer to array for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasIzamax | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amax finds the first index of the element of maximum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amax index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIzamaxBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host array of pointers of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIzamaxStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amaxStridedBatched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasIzamin | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
amin finds the first index of the element of minimum magnitude of a vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the amin index. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasIzaminBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch, must be > 0. |
| [out] | result | device or host pointers to array of batchCount size for results. return is 0 if n, incx<=0. |
| hipblasStatus_t hipblasIzaminStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| int * | result | ||
| ) |
BLAS Level 1 API.
aminStridedBatched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between one x_i and the next x_(i + 1) |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device or host pointer to array for storing contiguous batchCount results. return is 0 if n <= 0, incx<=0. |
| hipblasStatus_t hipblasNrm2BatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| int | batchCount, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS_EX API.
nrm2BatchedEx computes the euclidean norm over a batch of real or complex vectors
result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array of batchCount size for nrm2 results. return is 0.0 for each element if n <= 0, incx<=0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasNrm2Ex | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS_EX API.
nrm2Ex computes the euclidean norm of a real or complex vector
result := sqrt( x'*x ) for real vectors
result := sqrt( x**H*x ) for complex vectors
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of the vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the nrm2 product. return is 0.0 if n, incx<=0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasNrm2StridedBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| void * | result, | ||
| hipblasDatatype_t | resultType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS_EX API.
nrm2StridedBatchedEx computes the euclidean norm over a batch of real or complex vectors
:= sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
:= sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 for each element if n <= 0, incx<=0. |
| [in] | resultType | [hipblasDatatype_t] specifies the datatype of the result. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasRotBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| const void * | c, | ||
| const void * | s, | ||
| hipblasDatatype_t | csType, | ||
| int | batchCount, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
rotBatchedEx applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
In the case where cs_type is real: x := c * x + s * y y := c * y - s * x
In the case where cs_type is complex, the imaginary part of c is ignored: x := real(c) * x + s * y y := real(c) * y - conj(s) * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device array of deivce pointers storing each vector x_i. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | csType | [hipblasDatatype_t] specifies the datatype of c and s. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasRotEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| const void * | c, | ||
| const void * | s, | ||
| hipblasDatatype_t | csType, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
rotEx applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
In the case where cs_type is real: x := c * x + s * y y := c * y - s * x
In the case where cs_type is complex, the imaginary part of c is ignored: x := real(c) * x + s * y y := real(c) * y - conj(s) * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | c | device pointer or host pointer storing scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer storing scalar sine component of the rotation matrix. |
| [in] | csType | [hipblasDatatype_t] specifies the datatype of c and s. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasRotStridedBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| void * | y, | ||
| hipblasDatatype_t | yType, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const void * | c, | ||
| const void * | s, | ||
| hipblasDatatype_t | csType, | ||
| int | batchCount, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS Level 1 API.
rotStridedBatchedEx applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
In the case where cs_type is real: x := c * x + s * y y := c * y - s * x
In the case where cs_type is complex, the imaginary part of c is ignored: x := real(c) * x + s * y y := real(c) * y - conj(s) * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment from the beginning of x_i to the beginning of x_(i+1) |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | yType | [hipblasDatatype_t] specifies the datatype of each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment from the beginning of y_i to the beginning of y_(i+1) |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | csType | [hipblasDatatype_t] specifies the datatype of c and s. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasSasum | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
asum computes the sum of the magnitudes of elements of a real vector x, or the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must be > 0. |
| [in,out] | result | device pointer or host pointer to store the asum product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasSasumBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
asumBatched computes the sum of the magnitudes of the elements in a batch of real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| [out] | result | device array or host array of batchCount size for results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasSasumStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
asumStridedBatched computes the sum of the magnitudes of elements of a real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasSaxpy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
axpy computes constant alpha multiplied by vector x, plus vector y
y := alpha * x + y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasSaxpyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| const float *const | x[], | ||
| int | incx, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyBatched compute y := alpha * x + y over a set of batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSaxpyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyStridedBatched compute y := alpha * x + y over a set of strided batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] specifies the increment between vectors of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] specifies the increment between vectors of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasScalBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | alpha, | ||
| hipblasDatatype_t | alphaType, | ||
| void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| int | batchCount, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
scalBatchedEx scales each element of each vector x_i with scalar alpha.
x_i := alpha * x_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in] | alphaType | [hipblasDatatype_t] specifies the datatype of alpha. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasScalEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | alpha, | ||
| hipblasDatatype_t | alphaType, | ||
| void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
scalEx scales each element of vector x with scalar alpha.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in] | alphaType | [hipblasDatatype_t] specifies the datatype of alpha. |
| [in,out] | x | device pointer storing vector x. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasScalStridedBatchedEx | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const void * | alpha, | ||
| hipblasDatatype_t | alphaType, | ||
| void * | x, | ||
| hipblasDatatype_t | xType, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| hipblasDatatype_t | executionType | ||
| ) |
BLAS EX API.
scalStridedBatchedEx scales each element of vector x with scalar alpha over a set of strided batched vectors.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in] | alphaType | [hipblasDatatype_t] specifies the datatype of alpha. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | xType | [hipblasDatatype_t] specifies the datatype of each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) to the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch. |
| [in] | executionType | [hipblasDatatype_t] specifies the datatype of computation. |
| hipblasStatus_t hipblasScasum | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
asum computes the sum of the magnitudes of elements of a real vector x, or the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must be > 0. |
| [in,out] | result | device pointer or host pointer to store the asum product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasScasumBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
asumBatched computes the sum of the magnitudes of the elements in a batch of real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| [out] | result | device array or host array of batchCount size for results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasScasumStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
asumStridedBatched computes the sum of the magnitudes of elements of a real vectors x_i, or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex vector, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each vector x_i |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasScnrm2 | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
nrm2 computes the euclidean norm of a real or complex vector
result := sqrt( x'*x ) for real vectors
result := sqrt( x**H*x ) for complex vectors
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the nrm2 product. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasScnrm2Batched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
nrm2Batched computes the euclidean norm over a batch of real or complex vectors
result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array of batchCount size for nrm2 results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasScnrm2StridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
nrm2StridedBatched computes the euclidean norm over a batch of real or complex vectors
:= sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
:= sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasScopy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
copy copies each element x[i] into y[i], for i = 1 , ... , n
y := x,
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x to be copied to y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasScopyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float *const | x[], | ||
| int | incx, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasScopyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyStridedBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_y, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSdgmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | x, | ||
| int | incx, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
dgmm performs one of the matrix-matrix operations
C = A * diag(x) if side == HIPBLAS_SIDE_RIGHT C = diag(x) * A if side == HIPBLAS_SIDE_LEFT
where C and A are m by n dimensional matrices. diag( x ) is a diagonal matrix and x is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between values of x |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasSdgmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | x[], | ||
| int | incx, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_RIGHT C_i = diag(x_i) * A_i for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_LEFT
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A_i. |
| [in] | x | device array of device pointers storing each vector x_i on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x_i |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSdgmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmStridedBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) if side == HIPBLAS_SIDE_RIGHT for i = 0, 1, ... batchCount-1 C_i = diag(x_i) * A_i if side == HIPBLAS_SIDE_LEFT for i = 0, 1, ... batchCount-1
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | pointer to the first vector x_0 on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x |
| [in] | stridex | [hipblasStride] stride from the start of one vector(x_i) and the next one (x_i+1) |
| [in,out] | CP | device pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasSdot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | y, | ||
| int | incy, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasSdotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasSdotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasSetAtomicsMode | ( | hipblasHandle_t | handle, |
| hipblasAtomicsMode_t | atomics_mode | ||
| ) |
Set hipblasSetAtomicsMode.
| hipblasStatus_t hipblasSetMatrix | ( | int | rows, |
| int | cols, | ||
| int | elemSize, | ||
| const void * | AP, | ||
| int | lda, | ||
| void * | BP, | ||
| int | ldb | ||
| ) |
copy matrix from host to device
| [in] | rows | [int] number of rows in matrices |
| [in] | cols | [int] number of columns in matrices |
| [in] | elemSize | [int] number of bytes per element in the matrix |
| [in] | AP | pointer to matrix on the host |
| [in] | lda | [int] specifies the leading dimension of A, lda >= rows |
| [out] | BP | pointer to matrix on the GPU |
| [in] | ldb | [int] specifies the leading dimension of B, ldb >= rows |
| hipblasStatus_t hipblasSetMatrixAsync | ( | int | rows, |
| int | cols, | ||
| int | elemSize, | ||
| const void * | AP, | ||
| int | lda, | ||
| void * | BP, | ||
| int | ldb, | ||
| hipStream_t | stream | ||
| ) |
asynchronously copy matrix from host to device
hipblasSetMatrixAsync copies a matrix from pinned host memory to device memory asynchronously. Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
| [in] | rows | [int] number of rows in matrices |
| [in] | cols | [int] number of columns in matrices |
| [in] | elemSize | [int] number of bytes per element in the matrix |
| [in] | AP | pointer to matrix on the host |
| [in] | lda | [int] specifies the leading dimension of A, lda >= rows |
| [out] | BP | pointer to matrix on the GPU |
| [in] | ldb | [int] specifies the leading dimension of B, ldb >= rows |
| [in] | stream | specifies the stream into which this transfer request is queued |
| hipblasStatus_t hipblasSetPointerMode | ( | hipblasHandle_t | handle, |
| hipblasPointerMode_t | mode | ||
| ) |
Set hipblas pointer mode.
| hipblasStatus_t hipblasSetStream | ( | hipblasHandle_t | handle, |
| hipStream_t | streamId | ||
| ) |
Set stream for handle.
| hipblasStatus_t hipblasSetVector | ( | int | n, |
| int | elemSize, | ||
| const void * | x, | ||
| int | incx, | ||
| void * | y, | ||
| int | incy | ||
| ) |
copy vector from host to device
| [in] | n | [int] number of elements in the vector |
| [in] | elemSize | [int] Size of both vectors in bytes |
| [in] | x | pointer to vector on the host |
| [in] | incx | [int] specifies the increment for the elements of the vector |
| [out] | y | pointer to vector on the device |
| [in] | incy | [int] specifies the increment for the elements of the vector |
| hipblasStatus_t hipblasSetVectorAsync | ( | int | n, |
| int | elemSize, | ||
| const void * | x, | ||
| int | incx, | ||
| void * | y, | ||
| int | incy, | ||
| hipStream_t | stream | ||
| ) |
asynchronously copy vector from host to device
hipblasSetVectorAsync copies a vector from pinned host memory to device memory asynchronously. Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
| [in] | n | [int] number of elements in the vector |
| [in] | elemSize | [int] number of bytes per element in the matrix |
| [in] | x | pointer to vector on the host |
| [in] | incx | [int] specifies the increment for the elements of the vector |
| [out] | y | pointer to vector on the device |
| [in] | incy | [int] specifies the increment for the elements of the vector |
| [in] | stream | specifies the stream into which this transfer request is queued |
| hipblasStatus_t hipblasSgbmv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gbmv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n banded matrix with kl sub-diagonals and ku super-diagonals.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing banded matrix A. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasSgbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float * | beta, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | kl | [int] number of sub-diagonals of each A_i |
| [in] | ku | [int] number of super-diagonals of each A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each banded matrix A_i. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be >= (kl + ku + 1) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasSgbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to first banded matrix (A_1). Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (x_i+1) |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasSgeam | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | beta, | ||
| const float * | BP, | ||
| int | ldb, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
geam performs one of the matrix-matrix operations
C = alpha*op( A ) + beta*op( B ),
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by n matrix, op( B ) an m by n matrix, and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasSgeamBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float * | beta, | ||
| const float *const | BP[], | ||
| int | ldb, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasSgeamStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | beta, | ||
| const float * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| float * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamStridedBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | pointer to the first matrix B_0 on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in,out] | CP | pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasSgels | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| float * | A, | ||
| const int | lda, | ||
| float * | B, | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo | ||
| ) |
GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF").
Depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A X = B & \: \text{not transposed, or}\\ A' X = B & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X is found by minimizing
\[ || B - A X || \quad \text{(or} \: || B - A' X ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X is chosen such that \(|| X ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of matrix A. |
| [in] | n | int. n >= 0. The number of columns of matrix A. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of matrices B and X; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the matrix A. On exit, the QR (or LQ) factorization of A as returned by "GEQRF" (or "GELQF"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrix A. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the matrix B. On exit, when info = 0, B is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrix B. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int on the GPU. If info = 0, successful exit. If info = i > 0, the solution could not be computed because input matrix A is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| hipblasStatus_t hipblasSgelsBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| float *const | A[], | ||
| const int | lda, | ||
| float *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batchCount | ||
| ) |
gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | array of pointer to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_BATCHED" (or "GELQF_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in,out] | B | array of pointer to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the matrices B_j. On exit, when info[j] = 0, B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasSgelsStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| float * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| float * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batch_count | ||
| ) |
gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_STRIDED_BATCHED" (or "GELQF_STRIDED_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_j to the next one A_(j+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [in,out] | B | pointer to type. Array on the GPU (the size depends on the value of strideB). On entry, the matrices B_j. On exit, when info[j] = 0, each B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_j to the next one B_(j+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasSgemm | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | BP, | ||
| int | ldb, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
gemm performs one of the matrix-matrix operations
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] . |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] number or rows of matrices op( A ) and C |
| [in] | n | [int] number of columns of matrices op( B ) and C |
| [in] | k | [int] number of columns of matrix op( A ) and number of rows of matrix op( B ) |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer storing matrix C on the GPU. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasSgemmBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | BP[], | ||
| int | ldb, | ||
| const float * | beta, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmBatched performs one of the batched matrix-matrix operations C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount. where op( X ) is one of op( X ) = X or op( X ) = X**T or op( X ) = X**H, alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | BP | device array of device pointers storing each matrix B_i. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device array of device pointers storing each matrix C_i. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | batchCount | [int] number of gemm operations in the batch |
| hipblasStatus_t hipblasSgemmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| long long | strideA, | ||
| const float * | BP, | ||
| int | ldb, | ||
| long long | strideB, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc, | ||
| long long | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmStridedBatched performs one of the strided batched matrix-matrix operations
C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount.
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer pointing to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer pointing to the first matrix C_1. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | strideC | [hipblasStride] stride from the start of one C_i matrix to the next C_(i + 1). |
| [in] | batchCount | [int] number of gemm operatons in the batch |
| hipblasStatus_t hipblasSgemv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gemv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasSgemvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float * | beta, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSgemvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvStridedBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrices A_i |
| [in] | n | [int] number of columns of matrices A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of matrices A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridex >= n * incx, otherwise stridex >= m * incx. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridey >= m * incy, otherwise stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSgeqrf | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| float * | A, | ||
| const int | lda, | ||
| float * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
geqrf computes a QR factorization of a general m-by-n matrix A.
The factorization has the form
\[ A = Q\left[\begin{array}{c} R\\ 0 \end{array}\right] \]
where R is upper triangular (upper trapezoidal if m < n), and Q is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q = H_1H_2\cdots H_k, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_i\) is given by
\[ H_i = I - \text{ipiv}[i] \cdot v_i v_i' \]
where the first i-1 elements of the Householder vector \(v_i\) are zero, and \(v_i[i] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of the matrix A. |
| [in] | n | int. n >= 0. The number of columns of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the m-by-n matrix to be factored. On exit, the elements on and above the diagonal contain the factor R; the elements below the diagonal are the last m - i elements of Householder vector v_i. |
| [in] | lda | int. lda >= m. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to type. Array on the GPU of dimension min(m,n). The Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasSgeqrfBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| float *const | A[], | ||
| const int | lda, | ||
| float *const | ipiv[], | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_i[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | array of pointers to type. Each pointer points to an array on the GPU of dimension min(m, n). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasSgeqrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| float * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| float * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfStridedBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_j[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [out] | ipiv | pointer to type. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use is strideP >= min(m,n). |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasSger | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | y, | ||
| int | incy, | ||
| float * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
ger,geru,gerc performs the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where alpha is a scalar, x and y are vectors, and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of the matrix A. |
| [in] | n | [int] the number of columns of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasSgerBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float *const | y[], | ||
| int | incy, | ||
| float *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerBatched,geruBatched,gercBatched performs a batch of the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of eaceh matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSgerStridedBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerStridedBatched,geruStridedBatched,gercStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**T, OR A_i := A_i + alpha*x_i*y_i**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of each vector x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= m * incx. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size, for a typical case this means stridey >= n * incy. |
| [in,out] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSgetrf | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| float * | A, | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
getrf computes the LU factorization of a general n-by-n matrix A using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization has the form:
\[ A = PLU \]
where P is a permutation matrix, L is lower triangular with unit diagonal elements, and U is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A = LU \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the n-by-n matrix A to be factored. On exit, the factors L and U from the factorization. The unit diagonal elements of L are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to int. Array on the GPU of dimension n. The vector of pivot indices. Elements of ipiv are 1-based indices. For 1 <= i <= n, the row i of the matrix was interchanged with row ipiv[i]. Matrix P of the factorization can be derived from ipiv. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to a int on the GPU. If info = 0, successful exit. If info = j > 0, U is singular. U[j,j] is the first zero pivot. |
| hipblasStatus_t hipblasSgetrfBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| float *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorizations. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | pointer to int. Array on the GPU. Contains the vectors of pivot indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasSgetrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| float * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfStridedBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorization. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [out] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors of pivots indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasSgetriBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| float *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| float *const | C[], | ||
| const int | ldc, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getriBatched computes the inverse \(C_i = A_i^{-1}\) of a batch of general n-by-n matrices \(A_i\).
The inverse is computed by solving the linear system
\[ A_i C_i = I \]
where I is the identity matrix, and \(A_i\) is factorized as \(A_i = P_i L_i U_i\) as given by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of rows and columns of all matrices A_i in the batch. |
| [in] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). The pivot indices returned by getrfBatched. ipiv can be passed in as a nullptr, this will assume that getrfBatched was called without partial pivoting. |
| [out] | C | array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n. If info[i] = 0, the inverse of matrices A_i. Otherwise, undefined. |
| [in] | ldc | int. ldc >= n. Specifies the leading dimension of C_i. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for inversion of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasSgetrs | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| float * | A, | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| float * | B, | ||
| const int | ldb, | ||
| int * | info | ||
| ) |
SOLVER API.
getrs solves a system of n linear equations on n variables in its factorized form.
It solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A X = B & \: \text{not transposed,}\\ A^T X = B & \: \text{transposed, or}\\ A^H X = B & \: \text{conjugate transposed.} \end{array} \]
Matrix A is defined by its triangular factors as returned by getrf.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of A. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of the matrix B. |
| [in] | A | pointer to type. Array on the GPU of dimension lda*n. The factors L and U of the factorization A = P*L*U returned by getrf. |
| [in] | lda | int. lda >= n. The leading dimension of A. |
| [in] | ipiv | pointer to int. Array on the GPU of dimension n. The pivot indices returned by getrf. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrix B. On exit, the solution matrix X. |
| [in] | ldb | int. ldb >= n. The leading dimension of B. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasSgetrsBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| float *const | A[], | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| float *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU. Contains the vectors ipiv_i of pivot indices returned by getrfBatched. |
| [in,out] | B | Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasSgetrsStridedBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| float * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| const int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| float * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsStridedBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfStridedBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfStridedBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of pivot indices returned by getrfStridedBatched. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [in,out] | B | pointer to type. Array on the GPU (size depends on the value of strideB). On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_i to the next one B_(i+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasSnrm2 | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
nrm2 computes the euclidean norm of a real or complex vector
result := sqrt( x'*x ) for real vectors
result := sqrt( x**H*x ) for complex vectors
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the nrm2 product. return is 0.0 if n, incx<=0. |
| hipblasStatus_t hipblasSnrm2Batched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float *const | x[], | ||
| int | incx, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
nrm2Batched computes the euclidean norm over a batch of real or complex vectors
result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array of batchCount size for nrm2 results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasSnrm2StridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount, | ||
| float * | result | ||
| ) |
BLAS Level 1 API.
nrm2StridedBatched computes the euclidean norm over a batch of real or complex vectors
:= sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
:= sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. incx must be > 0. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] number of instances in the batch |
| [out] | result | device pointer or host pointer to array for storing contiguous batchCount results. return is 0.0 for each element if n <= 0, incx<=0. |
| hipblasStatus_t hipblasSrot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy, | ||
| const float * | c, | ||
| const float * | s | ||
| ) |
BLAS Level 1 API.
rot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | c | device pointer or host pointer storing scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer storing scalar sine component of the rotation matrix. |
| hipblasStatus_t hipblasSrotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float *const | x[], | ||
| int | incx, | ||
| float *const | y[], | ||
| int | incy, | ||
| const float * | c, | ||
| const float * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device array of deivce pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasSrotg | ( | hipblasHandle_t | handle, |
| float * | a, | ||
| float * | b, | ||
| float * | c, | ||
| float * | s | ||
| ) |
BLAS Level 1 API.
rotg creates the Givens rotation matrix for the vector (a b). Scalars c and s and arrays a and b may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device pointer or host pointer to input vector element, overwritten with r. |
| [in,out] | b | device pointer or host pointer to input vector element, overwritten with z. |
| [in,out] | c | device pointer or host pointer to cosine element of Givens rotation. |
| [in,out] | s | device pointer or host pointer sine element of Givens rotation. |
| hipblasStatus_t hipblasSrotgBatched | ( | hipblasHandle_t | handle, |
| float *const | a[], | ||
| float *const | b[], | ||
| float *const | c[], | ||
| float *const | s[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgBatched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device array of device pointers storing each single input vector element a_i, overwritten with r_i. |
| [in,out] | b | device array of device pointers storing each single input vector element b_i, overwritten with z_i. |
| [in,out] | c | device array of device pointers storing each cosine element of Givens rotation for the batch. |
| [in,out] | s | device array of device pointers storing each sine element of Givens rotation for the batch. |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasSrotgStridedBatched | ( | hipblasHandle_t | handle, |
| float * | a, | ||
| hipblasStride | stridea, | ||
| float * | b, | ||
| hipblasStride | strideb, | ||
| float * | c, | ||
| hipblasStride | stridec, | ||
| float * | s, | ||
| hipblasStride | strides, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgStridedBatched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device strided_batched pointer or host strided_batched pointer to first single input vector element a_1, overwritten with r. |
| [in] | stridea | [hipblasStride] distance between elements of a in batch (distance between a_i and a_(i + 1)) |
| [in,out] | b | device strided_batched pointer or host strided_batched pointer to first single input vector element b_1, overwritten with z. |
| [in] | strideb | [hipblasStride] distance between elements of b in batch (distance between b_i and b_(i + 1)) |
| [in,out] | c | device strided_batched pointer or host strided_batched pointer to first cosine element of Givens rotations c_1. |
| [in] | stridec | [hipblasStride] distance between elements of c in batch (distance between c_i and c_(i + 1)) |
| [in,out] | s | device strided_batched pointer or host strided_batched pointer to sine element of Givens rotations s_1. |
| [in] | strides | [hipblasStride] distance between elements of s in batch (distance between s_i and s_(i + 1)) |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasSrotm | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy, | ||
| const float * | param | ||
| ) |
BLAS Level 1 API.
rotm applies the modified Givens rotation matrix defined by param to vectors x and y.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | param | device vector or host vector of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| hipblasStatus_t hipblasSrotmBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float *const | x[], | ||
| int | incx, | ||
| float *const | y[], | ||
| int | incy, | ||
| const float *const | param[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmBatched applies the modified Givens rotation matrix defined by param_i to batched vectors x_i and y_i, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | param | device array of device vectors of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may ONLY be stored on the device for the batched version of this function. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasSrotmg | ( | hipblasHandle_t | handle, |
| float * | d1, | ||
| float * | d2, | ||
| float * | x1, | ||
| const float * | y1, | ||
| float * | param | ||
| ) |
BLAS Level 1 API.
rotmg creates the modified Givens rotation matrix for the vector (d1 * x1, d2 * y1). Parameters may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | d1 | device pointer or host pointer to input scalar that is overwritten. |
| [in,out] | d2 | device pointer or host pointer to input scalar that is overwritten. |
| [in,out] | x1 | device pointer or host pointer to input scalar that is overwritten. |
| [in] | y1 | device pointer or host pointer to input scalar. |
| [out] | param | device vector or host vector of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| hipblasStatus_t hipblasSrotmgBatched | ( | hipblasHandle_t | handle, |
| float *const | d1[], | ||
| float *const | d2[], | ||
| float *const | x1[], | ||
| const float *const | y1[], | ||
| float *const | param[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmgBatched creates the modified Givens rotation matrix for the batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batchCount. Parameters may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | d1 | device batched array or host batched array of input scalars that is overwritten. |
| [in,out] | d2 | device batched array or host batched array of input scalars that is overwritten. |
| [in,out] | x1 | device batched array or host batched array of input scalars that is overwritten. |
| [in] | y1 | device batched array or host batched array of input scalars. |
| [out] | param | device batched array or host batched array of vectors of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| [in] | batchCount | [int] the number of instances in the batch. |
| hipblasStatus_t hipblasSrotmgStridedBatched | ( | hipblasHandle_t | handle, |
| float * | d1, | ||
| hipblasStride | strided1, | ||
| float * | d2, | ||
| hipblasStride | strided2, | ||
| float * | x1, | ||
| hipblasStride | stridex1, | ||
| const float * | y1, | ||
| hipblasStride | stridey1, | ||
| float * | param, | ||
| hipblasStride | strideParam, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmgStridedBatched creates the modified Givens rotation matrix for the strided batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batchCount. Parameters may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | d1 | device strided_batched array or host strided_batched array of input scalars that is overwritten. |
| [in] | strided1 | [hipblasStride] specifies the increment between the beginning of d1_i and d1_(i+1) |
| [in,out] | d2 | device strided_batched array or host strided_batched array of input scalars that is overwritten. |
| [in] | strided2 | [hipblasStride] specifies the increment between the beginning of d2_i and d2_(i+1) |
| [in,out] | x1 | device strided_batched array or host strided_batched array of input scalars that is overwritten. |
| [in] | stridex1 | [hipblasStride] specifies the increment between the beginning of x1_i and x1_(i+1) |
| [in] | y1 | device strided_batched array or host strided_batched array of input scalars. |
| [in] | stridey1 | [hipblasStride] specifies the increment between the beginning of y1_i and y1_(i+1) |
| [out] | param | device stridedBatched array or host stridedBatched array of vectors of 5 elements defining the rotation. param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. |
| [in] | strideParam | [hipblasStride] specifies the increment between the beginning of param_i and param_(i + 1) |
| [in] | batchCount | [int] the number of instances in the batch. |
| hipblasStatus_t hipblasSrotmStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const float * | param, | ||
| hipblasStride | strideParam, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotmStridedBatched applies the modified Givens rotation matrix defined by param_i to strided batched vectors x_i and y_i, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer pointing to first strided batched vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment between the beginning of x_i and x_(i + 1) |
| [in,out] | y | device pointer pointing to first strided batched vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment between the beginning of y_i and y_(i + 1) |
| [in] | param | device pointer pointing to first array of 5 elements defining the rotation (param_1). param[0] = flag param[1] = H11 param[2] = H21 param[3] = H12 param[4] = H22 The flag parameter defines the form of H: flag = -1 => H = ( H11 H12 H21 H22 ) flag = 0 => H = ( 1.0 H12 H21 1.0 ) flag = 1 => H = ( H11 1.0 -1.0 H22 ) flag = -2 => H = ( 1.0 0.0 0.0 1.0 ) param may ONLY be stored on the device for the strided_batched version of this function. |
| [in] | strideParam | [hipblasStride] specifies the increment between the beginning of param_i and param_(i + 1) |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasSrotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const float * | c, | ||
| const float * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotStridedBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment from the beginning of x_i to the beginning of x_(i+1) |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment from the beginning of y_i to the beginning of y_(i+1) |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasSsbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
sbmv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n symmetric banded matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | k | [int] specifies the number of sub- and super-diagonals |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | lda | [int] specifies the leading dimension of matrix A |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasSsbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float * | beta, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sbmvBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric banded matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n symmetric banded matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | k | [int] specifies the number of sub- and super-diagonals |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSsbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sbmvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric banded matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n symmetric banded matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | k | [int] specifies the number of sub- and super-diagonals |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSscal | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| float * | x, | ||
| int | incx | ||
| ) |
BLAS Level 1 API.
scal scales each element of vector x with scalar alpha.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasSscalBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| float *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasSscalStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const float * | alpha, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalStridedBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i ,
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasSspmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
spmv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n packed symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasSspmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float * | beta, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spmvBatched performs the matrix-vector operation:
y_i := alpha*AP_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n packed symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSspmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| hipblasStride | strideA, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spmvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A should contain an upper or lower triangular n by n packed symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSspr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| float * | AP | ||
| ) |
BLAS Level 2 API.
spr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the symmetric matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| hipblasStatus_t hipblasSspr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | y, | ||
| int | incy, | ||
| float * | AP | ||
| ) |
BLAS Level 2 API.
spr2 performs the matrix-vector operation
A := A + alpha*x*y**T + alpha*y*x**T
where alpha is a scalar, x and y are vectors, and A is an n by n symmetric matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the symmetric matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(n) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| hipblasStatus_t hipblasSspr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float *const | y[], | ||
| int | incy, | ||
| float *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spr2Batched performs the matrix-vector operation
A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T
where alpha is a scalar, x_i and y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(n) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSspr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| float * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
spr2StridedBatched performs the matrix-vector operation
A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T
where alpha is a scalar, x_i amd y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | y | device pointer pointing to the first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of each symmetric matrix A_i. Points to the first A_1. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(n) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsprBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | x[], | ||
| int | incx, | ||
| float *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsprStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of each symmetric matrix A_i. Points to the first A_1. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSswap | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float * | x, | ||
| int | incx, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
swap interchanges vectors x and y.
y := x; x := y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasSswapBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float *const | x[], | ||
| int | incx, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSswapStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapStridedBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsymm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | BP, | ||
| int | ldb, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
symm performs one of the matrix-matrix operations:
C := alpha*A*B + beta*C if side == HIPBLAS_SIDE_LEFT, C := alpha*B*A + beta*C if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B and C are m by n matrices, and A is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C := alpha*A*B + beta*C HIPBLAS_SIDE_RIGHT: C := alpha*B*A + beta*C |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B and C. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B and C. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A and B are not referenced. |
| [in] | AP | pointer storing matrix A on the GPU. A is m by m if side == HIPBLAS_SIDE_LEFT A is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | pointer storing matrix B on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| hipblasStatus_t hipblasSsymmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | BP[], | ||
| int | ldb, | ||
| const float * | beta, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C_i need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C_i. ldc >= max( 1, m ) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsymmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmStridedBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device pointer to first matrix A_1 A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | device pointer to first matrix B_1 of dimension (ldb, n) on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device pointer to first matrix C_1 of dimension (ldc, n) on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsymv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
symv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | lda | [int] specifies the leading dimension of A |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasSsymvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float * | beta, | ||
| float *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSsymvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | beta, | ||
| float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSsyr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| float * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasSsyr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| const float * | y, | ||
| int | incy, | ||
| float * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr2 performs the matrix-vector operations
A := A + alpha*x*y**T + alpha*y*x**T
where alpha is a scalar, x and y are vectors, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasSsyr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | x[], | ||
| int | incx, | ||
| const float *const | y[], | ||
| int | incy, | ||
| float *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2Batched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrix, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSsyr2k | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | BP, | ||
| int | ldb, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syr2k performs one of the matrix-matrix operations for a symmetric rank-2k update
C := alpha*(op( A )*op( B )^T + op( B )*op( A )^T) + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasSsyr2kBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | BP[], | ||
| int | ldb, | ||
| const float * | beta, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsyr2kStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsyr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const float * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2StridedBatched the matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the pointer increment between vectors (y_i) and (y_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSsyrBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | x[], | ||
| int | incx, | ||
| float *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrBatched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, x is an array of vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasSsyrk | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrk performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( A )^T + beta*C
where alpha and beta are scalars, op(A) is an n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, and A is n by k if transA == HIPBLAS_OP_N op( A ) = A^T and A is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if transA = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasSsyrkBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float * | beta, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsyrkStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsyrkx | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | BP, | ||
| int | ldb, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrkx performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( B )^T + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be symmetric.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasSsyrkxBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | BP[], | ||
| int | ldb, | ||
| const float * | beta, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsyrkxStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const float * | beta, | ||
| float * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasSsyrStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrStridedBatched performs the matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| const char* hipblasStatusToString | ( | hipblasStatus_t | status | ) |
HIPBLAS Auxiliary API
hipblasStatusToString
Returns string representing hipblasStatus_t value
| [in] | status | [hipblasStatus_t] hipBLAS status to convert to string |
| hipblasStatus_t hipblasStbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const float * | AP, | ||
| int | lda, | ||
| float * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbmv performs one of the matrix-vector operations
x := A*x or x := A**T*x or x := A**H*x,
x is a vectors and A is a banded m by m matrix (see description below).
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of A is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of A's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by A. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of the matrix A. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of the matrix A. k must satisfy k > 0 && k < lda. |
| [in] | AP | device pointer storing banded triangular matrix A. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of A. lda must satisfy lda > k. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasStbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| float *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array of device pointers storing each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in,out] | x | device array of device pointer storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasStbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvStridedBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | x | device array to the first vector x_i of the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i matrix to the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasStbsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const float * | AP, | ||
| int | lda, | ||
| float * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbsv solves
A*x = b or A**T*x = b or A**H*x = b,
where x and b are vectors and A is a banded triangular matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of b. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of A. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of A. k >= 0. |
| [in] | AP | device pointer storing the matrix A in banded format. |
| [in] | lda | [int] specifies the leading dimension of A. lda >= (k + 1). |
| [in,out] | x | device pointer storing input vector b. Overwritten by the output vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasStbsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| float *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device vector of device pointers storing each matrix A_i in banded format. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in,out] | x | device vector of device pointers storing each input vector b_i. Overwritten by each output vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasStbsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device pointer pointing to the first banded matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in] | strideA | [hipblasStride] specifies the distance between the start of one matrix (A_i) and the next (A_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by output vectors x. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the distance between the start of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasStpmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| float * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix, supplied in the pack form.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension at leat ( m * ( m + 1 ) / 2 ). Before entry with uplo = HIPBLAS_FILL_MODE_UPPER, the array A must contain the upper triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{0,1} and a_{1, 1} respectively, and so on. Before entry with uplo = HIPBLAS_FILL_MODE_LOWER, the array A must contain the lower triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{1,0} and a_{2,0} respectively, and so on. Note that when DIAG = HIPBLAS_DIAG_UNIT, the diagonal elements of A are not referenced, but are assumed to be unity. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must not be zero. |
| hipblasStatus_t hipblasStpmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float *const | AP[], | ||
| float *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasStpmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| hipblasStride | strideA, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasStpsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| float * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpsv solves
A*x = b or A**T*x = b, or A**H*x = b,
where x and b are vectors and A is a triangular matrix stored in the packed format.
The input vector b is overwritten by the output vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing the packed version of matrix A, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device pointer storing vector b on input, overwritten by x on output. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasStpsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float *const | AP[], | ||
| float *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device array of device pointers storing the packed versions of each matrix A_i, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device array of device pointers storing each input vector b_i, overwritten by x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasStpsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| hipblasStride | strideA, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer pointing to the first packed matrix A_1, of dimension >= (n * (n + 1) / 2) |
| [in] | strideA | [hipblasStride] stride from the beginning of one packed matrix (AP_i) and the next (AP_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by each x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the beginning of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasStrmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| float * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
float* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
const float* BP,
int ldb,
float* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
double* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
const double* BP,
int ldb,
double* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
const hipblasComplex* BP,
int ldb,
hipblasComplex* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasDoubleComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
const hipblasDoubleComplex* BP,
int ldb,
hipblasDoubleComplex* CP,
int ldc);
#endif
The deprecated Legacy BLAS in-place trmm performs one of the matrix-matrix operations:
B := alpha*op( A )*B, or B := alpha*B*op( A ),
The new trmm performs one of the matrix-matrix operations:
C := alpha*op( A )*B, or C := alpha*B*op( A ),
The in-place functionality is still available in the new trmmm by setting pointer C equal to pointer B, and ldc equal to ldb.
alpha is a scalar, B is an m by n matrix, C is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A^T or op( A ) = A^H. When uplo == rocblas_fill_upper the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. When uplo == rocblas_fill_lower the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. Note that when diag == rocblas_diagonal_unit the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A) multiplies B from the left or right as follows: HIPBLAS_SIDE_LEFT: B := alpha*op( A )*B. HIPBLAS_SIDE_RIGHT: B := alpha*B*op( A ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A is unit triangular as follows: HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
| [in] | AP | Device pointer to matrix A on the GPU. A has dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B must contain the matrix B, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasStrmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| float *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
float* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
const float* const BP[],
int ldb,
float* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
double* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
const double* const BP[],
int ldb,
double* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
hipblasComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
const hipblasComplex* const BP[],
int ldb,
hipblasComplex* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
hipblasDoubleComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
const hipblasDoubleComplex* const BP[],
int ldb,
hipblasDoubleComplex* const CP[],
int ldc,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_batched performs one of the batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B and ldc equal to ldb.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasStrmmBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| const float *const | BP[], | ||
| int | ldb, | ||
| float *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasStrmmOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| const float * | BP, | ||
| int | ldb, | ||
| float * | CP, | ||
| int | ldc | ||
| ) |
| hipblasStatus_t hipblasStrmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| float * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_strided_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
float* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
const float* BP,
int ldb,
hipblasStride strideB,
float* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
double* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
const double* BP,
int ldb,
hipblasStride strideB,
double* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
hipblasComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
const hipblasComplex* BP,
int ldb,
hipblasStride strideB,
hipblasComplex* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmStridedBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
const hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
hipblasDoubleComplex* BC,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_strided_batched performs one of the strided_batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B, setting ldc equal to ldb, and setting stride_C equal to stride_B.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasStrmmStridedBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const float * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| float * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasStrmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| int | lda, | ||
| float * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasStrmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| float *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasStrmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasStrsm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| float * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
trsm solves
op(A)*X = alpha*B or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is triangular matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
The matrix X is overwritten on B.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time.
(where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer storing matrix A. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasStrsmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| float *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmBatched performs the following batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are batched m by n matrices, A is triangular batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Matricies are of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasStrsmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const float * | alpha, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| float * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmSridedBatched performs the following strided batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are strided batched m by n matrices, A is triangular strided batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer pointing to the first matrix A_1. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasStrsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| int | lda, | ||
| float * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trsv solves
A*x = b or A**T*x = b,
where x and b are vectors and A is a triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasStrsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| float *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max(1, m) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasStrsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| float * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix, for i = 1, ..., batchCount.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1) |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max( 1, m ). |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_(i + 1) |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasStrtri | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const float * | AP, | ||
| int | lda, | ||
| float * | invA, | ||
| int | ldinvA | ||
| ) |
BLAS Level 3 API.
trtri compute the inverse of a matrix A, namely, invA
and write the result into invA;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] size of matrix A and invA |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [out] | invA | device pointer storing matrix invA. |
| [in] | ldinvA | [int] specifies the leading dimension of invA. |
| hipblasStatus_t hipblasStrtriBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const float *const | AP[], | ||
| int | lda, | ||
| float * | invA[], | ||
| int | ldinvA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [out] | invA | device array of device pointers storing the inverse of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | batchCount | [int] numbers of matrices in the batch |
| hipblasStatus_t hipblasStrtriStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const float * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| float * | invA, | ||
| int | ldinvA, | ||
| hipblasStride | stride_invA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriStridedBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device pointer pointing to address of first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A. |
| [in] | strideA | [hipblasStride] "batch stride a": stride from the start of one A_i matrix to the next A_(i + 1). |
| [out] | invA | device pointer storing the inverses of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | stride_invA | [hipblasStride] "batch stride invA": stride from the start of one invA_i matrix to the next invA_(i + 1). |
| [in] | batchCount | [int] numbers of matrices in the batch |
| hipblasStatus_t hipblasTrsmBatchedEx | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const void * | alpha, | ||
| void * | A, | ||
| int | lda, | ||
| void * | B, | ||
| int | ldb, | ||
| int | batchCount, | ||
| const void * | invA, | ||
| int | invAsize, | ||
| hipblasDatatype_t | computeType | ||
| ) |
BLAS EX API
trsmBatchedEx solves
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i,
for i = 1, ..., batchCount; and where alpha is a scalar, X and B are arrays of m by n matrices, A is an array of triangular matrix and each op(A_i) is one of
op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
Each matrix X_i is overwritten on B_i.
This function gives the user the ability to reuse the invA matrix between runs. If invA == NULL, hipblasTrsmBatchedEx will automatically calculate each invA_i on every run.
Setting up invA: Each accepted invA_i matrix consists of the packed 128x128 inverses of the diagonal blocks of matrix A_i, followed by any smaller diagonal block that remains. To set up each invA_i it is recommended that hipblasTrtriBatched be used with matrix A_i as the input. invA is an array of pointers of batchCount length holding each invA_i.
Device memory of size 128 x k should be allocated for each invA_i ahead of time, where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT. The actual number of elements in each invA_i should be passed as invAsize.
To begin, hipblasTrtriBatched must be called on the full 128x128 sized diagonal blocks of each matrix A_i. Below are the restricted parameters:
Then any remaining block may be added:
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | [void *] device pointer or host pointer alpha specifying the scalar alpha. When alpha is &zero then A is not referenced, and B need not be set before entry. |
| [in] | A | [void *] device array of device pointers storing each matrix A_i. each A_i is of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | B | [void *] device array of device pointers storing each matrix B_i. each B_i is of dimension ( ldb, n ). Before entry, the leading m by n part of the array B_i must contain the right-hand side matrix B_i, and on exit is overwritten by the solution matrix X_i |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] specifies how many batches. |
| [in] | invA | [void *] device array of device pointers storing the inverse diagonal blocks of each A_i. each invA_i is of dimension ( ld_invA, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT. ld_invA must be equal to 128. |
| [in] | invAsize | [int] invAsize specifies the number of elements of device memory in each invA_i. |
| [in] | computeType | [hipblasDatatype_t] specifies the datatype of computation |
| hipblasStatus_t hipblasTrsmEx | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const void * | alpha, | ||
| void * | A, | ||
| int | lda, | ||
| void * | B, | ||
| int | ldb, | ||
| const void * | invA, | ||
| int | invAsize, | ||
| hipblasDatatype_t | computeType | ||
| ) |
BLAS EX API
trsmEx solves
op(A)*X = alpha*B or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is triangular matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
The matrix X is overwritten on B.
This function gives the user the ability to reuse the invA matrix between runs. If invA == NULL, hipblasTrsmEx will automatically calculate invA on every run.
Setting up invA: The accepted invA matrix consists of the packed 128x128 inverses of the diagonal blocks of matrix A, followed by any smaller diagonal block that remains. To set up invA it is recommended that hipblasTrtriBatched be used with matrix A as the input.
Device memory of size 128 x k should be allocated for invA ahead of time, where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT. The actual number of elements in invA should be passed as invAsize.
To begin, hipblasTrtriBatched must be called on the full 128x128 sized diagonal blocks of matrix A. Below are the restricted parameters:
Then any remaining block may be added:
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_ON_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | [void *] device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced, and B need not be set before entry. |
| [in] | A | [void *] device pointer storing matrix A. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | B | [void *] device pointer storing matrix B. B is of dimension ( ldb, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B, and on exit is overwritten by the solution matrix X. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| [in] | invA | [void *] device pointer storing the inverse diagonal blocks of A. invA is of dimension ( ld_invA, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT. ld_invA must be equal to 128. |
| [in] | invAsize | [int] invAsize specifies the number of elements of device memory in invA. |
| [in] | computeType | [hipblasDatatype_t] specifies the datatype of computation |
| hipblasStatus_t hipblasTrsmStridedBatchedEx | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const void * | alpha, | ||
| void * | A, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| void * | B, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount, | ||
| const void * | invA, | ||
| int | invAsize, | ||
| hipblasStride | strideInvA, | ||
| hipblasDatatype_t | computeType | ||
| ) |
BLAS EX API
trsmStridedBatchedEx solves
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i,
for i = 1, ..., batchCount; and where alpha is a scalar, X and B are strided batched m by n matrices, A is a strided batched triangular matrix and op(A_i) is one of
op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
Each matrix X_i is overwritten on B_i.
This function gives the user the ability to reuse each invA_i matrix between runs. If invA == NULL, hipblasTrsmStridedBatchedEx will automatically calculate each invA_i on every run.
Setting up invA: Each accepted invA_i matrix consists of the packed 128x128 inverses of the diagonal blocks of matrix A_i, followed by any smaller diagonal block that remains. To set up invA_i it is recommended that hipblasTrtriBatched be used with matrix A_i as the input. invA is a contiguous piece of memory holding each invA_i.
Device memory of size 128 x k should be allocated for each invA_i ahead of time, where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT. The actual number of elements in each invA_i should be passed as invAsize.
To begin, hipblasTrtriBatched must be called on the full 128x128 sized diagonal blocks of each matrix A_i. Below are the restricted parameters:
Then any remaining block may be added:
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | [void *] device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced, and B need not be set before entry. |
| [in] | A | [void *] device pointer storing matrix A. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] The stride between each A matrix. |
| [in,out] | B | [void *] device pointer pointing to first matrix B_i. each B_i is of dimension ( ldb, n ). Before entry, the leading m by n part of each array B_i must contain the right-hand side of matrix B_i, and on exit is overwritten by the solution matrix X_i. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] The stride between each B_i matrix. |
| [in] | batchCount | [int] specifies how many batches. |
| [in] | invA | [void *] device pointer storing the inverse diagonal blocks of each A_i. invA points to the first invA_1. each invA_i is of dimension ( ld_invA, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT. ld_invA must be equal to 128. |
| [in] | invAsize | [int] invAsize specifies the number of elements of device memory in each invA_i. |
| [in] | strideInvA | [hipblasStride] The stride between each invA matrix. |
| [in] | computeType | [hipblasDatatype_t] specifies the datatype of computation |
| hipblasStatus_t hipblasZaxpy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
axpy computes constant alpha multiplied by vector x, plus vector y
y := alpha * x + y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | device pointer or host pointer to specify the scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZaxpyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyBatched compute y := alpha * x + y over a set of batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZaxpyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
axpyStridedBatched compute y := alpha * x + y over a set of strided batched vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha. |
| [in] | x | pointer storing vector x on the GPU. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] specifies the increment between vectors of x. |
| [out] | y | pointer storing vector y on the GPU. |
| [in,out] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] specifies the increment between vectors of y. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZcopy | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
copy copies each element x[i] into y[i], for i = 1 , ... , n
y := x,
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x to be copied to y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZcopyBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZcopyStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
copyStridedBatched copies each element x_i[j] into y_i[j], for j = 1 , ... , n; i = 1 , ... , batchCount
y_i := x_i,
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i to be copied to y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_y, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZdgmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
dgmm performs one of the matrix-matrix operations
C = A * diag(x) if side == HIPBLAS_SIDE_RIGHT C = diag(x) * A if side == HIPBLAS_SIDE_LEFT
where C and A are m by n dimensional matrices. diag( x ) is a diagonal matrix and x is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between values of x |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasZdgmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_RIGHT C_i = diag(x_i) * A_i for i = 0, 1, ... batchCount-1 if side == HIPBLAS_SIDE_LEFT
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A_i. |
| [in] | x | device array of device pointers storing each vector x_i on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x_i |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZdgmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
dgmmStridedBatched performs one of the batched matrix-matrix operations
C_i = A_i * diag(x_i) if side == HIPBLAS_SIDE_RIGHT for i = 0, 1, ... batchCount-1 C_i = diag(x_i) * A_i if side == HIPBLAS_SIDE_LEFT for i = 0, 1, ... batchCount-1
where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix and x_i is vector of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] specifies the side of diag(x) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, n ) |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | pointer to the first vector x_0 on the GPU. Each x_i is of dimension n if side == HIPBLAS_SIDE_RIGHT and dimension m if side == HIPBLAS_SIDE_LEFT |
| [in] | incx | [int] specifies the increment between values of x |
| [in] | stridex | [hipblasStride] stride from the start of one vector(x_i) and the next one (x_i+1) |
| [in,out] | CP | device pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasZdotc | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasDoubleComplex * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasZdotcBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| hipblasDoubleComplex * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasZdotcStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| hipblasDoubleComplex * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasZdotu | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasDoubleComplex * | result | ||
| ) |
BLAS Level 1 API.
dot(u) performs the dot product of vectors x and y
result = x * y;
dotc performs the dot product of the conjugate of complex vector x and complex vector y
result = conjugate (x) * y;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of y. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | result | device pointer or host pointer to store the dot product. return is 0.0 if n <= 0. |
| hipblasStatus_t hipblasZdotuBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount, | ||
| hipblasDoubleComplex * | result | ||
| ) |
BLAS Level 1 API.
dotBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasZdotuStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount, | ||
| hipblasDoubleComplex * | result | ||
| ) |
BLAS Level 1 API.
dotStridedBatched(u) performs a batch of dot products of vectors x and y
result_i = x_i * y_i;
dotcStridedBatched performs a batch of dot products of the conjugate of complex vector x and complex vector y
result_i = conjugate (x_i) * y_i;
where (x_i, y_i) is the i-th instance of the batch. x_i and y_i are vectors, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| [in,out] | result | device array or host array of batchCount size to store the dot products of each batch. return 0.0 for each element if n <= 0. |
| hipblasStatus_t hipblasZdrot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| const double * | c, | ||
| const double * | s | ||
| ) |
BLAS Level 1 API.
rot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | c | device pointer or host pointer storing scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer storing scalar sine component of the rotation matrix. |
| hipblasStatus_t hipblasZdrotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| const double * | c, | ||
| const double * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device array of deivce pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasZdrotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const double * | c, | ||
| const double * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotStridedBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment from the beginning of x_i to the beginning of x_(i+1) |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment from the beginning of y_i to the beginning of y_(i+1) |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasZdscal | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 1 API.
scal scales each element of vector x with scalar alpha.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasZdscalBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasZdscalStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const double * | alpha, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalStridedBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i ,
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasZgbmv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gbmv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n banded matrix with kl sub-diagonals and ku super-diagonals.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing banded matrix A. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZgbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | kl | [int] number of sub-diagonals of each A_i |
| [in] | ku | [int] number of super-diagonals of each A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each banded matrix A_i. Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be >= (kl + ku + 1) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasZgbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| int | kl, | ||
| int | ku, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gbmvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n banded matrix with kl sub-diagonals and ku super-diagonals, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | kl | [int] number of sub-diagonals of A |
| [in] | ku | [int] number of super-diagonals of A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to first banded matrix (A_1). Leading (kl + ku + 1) by n part of the matrix contains the coefficients of the banded matrix. The leading diagonal resides in row (ku + 1) with the first super-diagonal above on the RHS of row ku. The first sub-diagonal resides below on the LHS of row ku + 2. This propogates up and down across sub/super-diagonals. Ex: (m = n = 7; ku = 2, kl = 2) 1 2 3 0 0 0 0 0 0 3 3 3 3 3 4 1 2 3 0 0 0 0 2 2 2 2 2 2 5 4 1 2 3 0 0 -—> 1 1 1 1 1 1 1 0 5 4 1 2 3 0 4 4 4 4 4 4 0 0 0 5 4 1 2 0 5 5 5 5 5 0 0 0 0 0 5 4 1 2 0 0 0 0 0 0 0 0 0 0 0 5 4 1 0 0 0 0 0 0 0 Note that the empty elements which don't correspond to data will not be referenced. |
| [in] | lda | [int] specifies the leading dimension of A. Must be >= (kl + ku + 1) |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (x_i+1) |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasZgeam | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | beta, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
geam performs one of the matrix-matrix operations
C = alpha*op( A ) + beta*op( B ),
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by n matrix, op( B ) an m by n matrix, and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device pointer storing matrix C. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasZgeamBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex * | beta, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in,out] | CP | device array of device pointers storing each matrix C_i on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasZgeamStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | beta, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
geamStridedBatched performs one of the batched matrix-matrix operations
C_i = alpha*op( A_i ) + beta*op( B_i ) for i = 0, 1, ... batchCount - 1
where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices and op( X ) is one of
op( X ) = X or op( X ) = X**T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimension m. |
| [in] | n | [int] matrix dimension n. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when transA == HIPBLAS_OP_N and is n when transA == HIPBLAS_OP_T. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in] | BP | pointer to the first matrix B_0 on the GPU. Each B_i is of dimension ( ldb, k ), where k is m when transB == HIPBLAS_OP_N and is n when transB == HIPBLAS_OP_T. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in,out] | CP | pointer to the first matrix C_0 on the GPU. Each C_i is of dimension ( ldc, n ). |
| [in] | ldc | [int] specifies the leading dimension of C. |
| [in] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasZgels | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| hipblasDoubleComplex * | B, | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo | ||
| ) |
GELS solves an overdetermined (or underdetermined) linear system defined by an m-by-n matrix A, and a corresponding matrix B, using the QR factorization computed by GEQRF (or the LQ factorization computed by "GELQF").
Depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A X = B & \: \text{not transposed, or}\\ A' X = B & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X is found by minimizing
\[ || B - A X || \quad \text{(or} \: || B - A' X ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X is chosen such that \(|| X ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of matrix A. |
| [in] | n | int. n >= 0. The number of columns of matrix A. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of matrices B and X; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the matrix A. On exit, the QR (or LQ) factorization of A as returned by "GEQRF" (or "GELQF"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrix A. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the matrix B. On exit, when info = 0, B is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrix B. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int on the GPU. If info = 0, successful exit. If info = i > 0, the solution could not be computed because input matrix A is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| hipblasStatus_t hipblasZgelsBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasDoubleComplex *const | A[], | ||
| const int | lda, | ||
| hipblasDoubleComplex *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batchCount | ||
| ) |
gelsBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_BATCHED" (or the LQ factorizations computed by "GELQF_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | array of pointer to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_BATCHED" (or "GELQF_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in,out] | B | array of pointer to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the matrices B_j. On exit, when info[j] = 0, B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasZgelsStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| const int | m, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| hipblasDoubleComplex * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| int * | deviceInfo, | ||
| const int | batch_count | ||
| ) |
gelsStridedBatched solves a batch of overdetermined (or underdetermined) linear systems defined by a set of m-by-n matrices \(A_j\), and corresponding matrices \(B_j\), using the QR factorizations computed by "GEQRF_STRIDED_BATCHED" (or the LQ factorizations computed by "GELQF_STRIDED_BATCHED").
For each instance in the batch, depending on the value of trans, the problem solved by this function is either of the form
\[ \begin{array}{cl} A_j X_j = B_j & \: \text{not transposed, or}\\ A_j' X_j = B_j & \: \text{transposed if real, or conjugate transposed if complex} \end{array} \]
If m >= n (or m < n in the case of transpose/conjugate transpose), the system is overdetermined and a least-squares solution approximating X_j is found by minimizing
\[ || B_j - A_j X_j || \quad \text{(or} \: || B_j - A_j' X_j ||\text{)} \]
If m < n (or m >= n in the case of transpose/conjugate transpose), the system is underdetermined and a unique solution for X_j is chosen such that \(|| X_j ||\) is minimal.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | m | int. m >= 0. The number of rows of all matrices A_j in the batch. |
| [in] | n | int. n >= 0. The number of columns of all matrices A_j in the batch. |
| [in] | nrhs | int. nrhs >= 0. The number of columns of all matrices B_j and X_j in the batch; i.e., the columns on the right hand side. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the matrices A_j. On exit, the QR (or LQ) factorizations of A_j as returned by "GEQRF_STRIDED_BATCHED" (or "GELQF_STRIDED_BATCHED"). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_j. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_j to the next one A_(j+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [in,out] | B | pointer to type. Array on the GPU (the size depends on the value of strideB). On entry, the matrices B_j. On exit, when info[j] = 0, each B_j is overwritten by the solution vectors (and the residuals in the overdetermined cases) stored as columns. |
| [in] | ldb | int. ldb >= max(m,n). Specifies the leading dimension of matrices B_j. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_j to the next one B_(j+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs |
| [out] | info | pointer to an int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [out] | deviceInfo | pointer to int. Array of batchCount integers on the GPU. If deviceInfo[j] = 0, successful exit for solution of A_j. If deviceInfo[j] = i > 0, the solution of A_j could not be computed because input matrix A_j is rank deficient; the i-th diagonal element of its triangular factor is zero. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasZgemm | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
gemm performs one of the matrix-matrix operations
C = alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
| [in] | handle | [hipblasHandle_t] . |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] number or rows of matrices op( A ) and C |
| [in] | n | [int] number of columns of matrices op( B ) and C |
| [in] | k | [int] number of columns of matrix op( A ) and number of rows of matrix op( B ) |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] specifies the leading dimension of B. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer storing matrix C on the GPU. |
| [in] | ldc | [int] specifies the leading dimension of C. |
| hipblasStatus_t hipblasZgemmBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmBatched performs one of the batched matrix-matrix operations C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount. where op( X ) is one of op( X ) = X or op( X ) = X**T or op( X ) = X**H, alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | BP | device array of device pointers storing each matrix B_i. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device array of device pointers storing each matrix C_i. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | batchCount | [int] number of gemm operations in the batch |
| hipblasStatus_t hipblasZgemmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| hipblasOperation_t | transB, | ||
| int | m, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| long long | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| long long | strideB, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| long long | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
gemmStridedBatched performs one of the strided batched matrix-matrix operations
C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount.
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are strided batched matrices, with op( A ) an m by k by batchCount strided_batched matrix, op( B ) an k by n by batchCount strided_batched matrix and C an m by n by batchCount strided_batched matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] specifies the form of op( A ) |
| [in] | transB | [hipblasOperation_t] specifies the form of op( B ) |
| [in] | m | [int] matrix dimention m. |
| [in] | n | [int] matrix dimention n. |
| [in] | k | [int] matrix dimention k. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. |
| [in] | AP | device pointer pointing to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] specifies the leading dimension of each B_i. |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | beta | device pointer or host pointer specifying the scalar beta. |
| [in,out] | CP | device pointer pointing to the first matrix C_1. |
| [in] | ldc | [int] specifies the leading dimension of each C_i. |
| [in] | strideC | [hipblasStride] stride from the start of one C_i matrix to the next C_(i + 1). |
| [in] | batchCount | [int] number of gemm operatons in the batch |
| hipblasStatus_t hipblasZgemv | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
gemv performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or y := alpha*A**H*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrix A |
| [in] | n | [int] number of columns of matrix A |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZgemvBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | trans, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | trans | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of each matrix A_i |
| [in] | n | [int] number of columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZgemvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasOperation_t | transA, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gemvStridedBatched performs a batch of matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i, or y_i := alpha*A_i**T*x_i + beta*y_i, or y_i := alpha*A_i**H*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | transA | [hipblasOperation_t] indicates whether matrices A_i are tranposed (conjugated) or not |
| [in] | m | [int] number of rows of matrices A_i |
| [in] | n | [int] number of columns of matrices A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of matrices A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridex >= n * incx, otherwise stridex >= m * incx. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of vectors y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. When trans equals HIPBLAS_OP_N this typically means stridey >= m * incy, otherwise stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZgeqrf | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| hipblasDoubleComplex * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
geqrf computes a QR factorization of a general m-by-n matrix A.
The factorization has the form
\[ A = Q\left[\begin{array}{c} R\\ 0 \end{array}\right] \]
where R is upper triangular (upper trapezoidal if m < n), and Q is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q = H_1H_2\cdots H_k, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_i\) is given by
\[ H_i = I - \text{ipiv}[i] \cdot v_i v_i' \]
where the first i-1 elements of the Householder vector \(v_i\) are zero, and \(v_i[i] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of the matrix A. |
| [in] | n | int. n >= 0. The number of columns of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the m-by-n matrix to be factored. On exit, the elements on and above the diagonal contain the factor R; the elements below the diagonal are the last m - i elements of Householder vector v_i. |
| [in] | lda | int. lda >= m. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to type. Array on the GPU of dimension min(m,n). The Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasZgeqrfBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| hipblasDoubleComplex *const | A[], | ||
| const int | lda, | ||
| hipblasDoubleComplex *const | ipiv[], | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_i[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | array of pointers to type. Each pointer points to an array on the GPU of dimension min(m, n). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasZgeqrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | m, | ||
| const int | n, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| hipblasDoubleComplex * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
geqrfStridedBatched computes the QR factorization of a batch of general m-by-n matrices.
The factorization of matrix \(A_i\) in the batch has the form
\[ A_i = Q_i\left[\begin{array}{c} R_i\\ 0 \end{array}\right] \]
where \(R_i\) is upper triangular (upper trapezoidal if m < n), and \(Q_i\) is a m-by-m orthogonal/unitary matrix represented as the product of Householder matrices
\[ Q_i = H_{i_1}H_{i_2}\cdots H_{i_k}, \quad \text{with} \: k = \text{min}(m,n) \]
Each Householder matrix \(H_{i_j}\) is given by
\[ H_{i_j} = I - \text{ipiv}_j[j] \cdot v_{i_j} v_{i_j}' \]
where the first j-1 elements of Householder vector \(v_{i_j}\) are zero, and \(v_{i_j}[j] = 1\).
| [in] | handle | hipblasHandle_t. |
| [in] | m | int. m >= 0. The number of rows of all the matrices A_i in the batch. |
| [in] | n | int. n >= 0. The number of columns of all the matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the m-by-n matrices A_i to be factored. On exit, the elements on and above the diagonal contain the factor R_i. The elements below the diagonal are the last m - j elements of Householder vector v_(i_j). |
| [in] | lda | int. lda >= m. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [out] | ipiv | pointer to type. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of corresponding Householder scalars. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use is strideP >= min(m,n). |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasZgerc | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
ger,geru,gerc performs the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where alpha is a scalar, x and y are vectors, and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of the matrix A. |
| [in] | n | [int] the number of columns of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasZgercBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerBatched,geruBatched,gercBatched performs a batch of the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of eaceh matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZgercStridedBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerStridedBatched,geruStridedBatched,gercStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**T, OR A_i := A_i + alpha*x_i*y_i**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of each vector x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= m * incx. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size, for a typical case this means stridey >= n * incy. |
| [in,out] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZgeru | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
ger,geru,gerc performs the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where alpha is a scalar, x and y are vectors, and A is an m by n matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of the matrix A. |
| [in] | n | [int] the number of columns of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasZgeruBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerBatched,geruBatched,gercBatched performs a batch of the matrix-vector operations
A := A + alpha*x*y**T , OR A := A + alpha*x*y**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of eaceh matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZgeruStridedBatched | ( | hipblasHandle_t | handle, |
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
gerStridedBatched,geruStridedBatched,gercStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**T, OR A_i := A_i + alpha*x_i*y_i**H for gerc
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha is a scalar, x_i and y_i are vectors and A_i is an m by n matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | m | [int] the number of rows of each matrix A_i. |
| [in] | n | [int] the number of columns of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increments for the elements of each vector x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size, for a typical case this means stridex >= m * incx. |
| [in,out] | y | device pointer to the first vector (y_1) in the batch. |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size, for a typical case this means stridey >= n * incy. |
| [in,out] | AP | device pointer to the first matrix (A_1) in the batch. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZgetrf | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info | ||
| ) |
SOLVER API.
getrf computes the LU factorization of a general n-by-n matrix A using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization has the form:
\[ A = PLU \]
where P is a permutation matrix, L is lower triangular with unit diagonal elements, and U is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A = LU \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of the matrix A. |
| [in,out] | A | pointer to type. Array on the GPU of dimension lda*n. On entry, the n-by-n matrix A to be factored. On exit, the factors L and U from the factorization. The unit diagonal elements of L are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of A. |
| [out] | ipiv | pointer to int. Array on the GPU of dimension n. The vector of pivot indices. Elements of ipiv are 1-based indices. For 1 <= i <= n, the row i of the matrix was interchanged with row ipiv[i]. Matrix P of the factorization can be derived from ipiv. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to a int on the GPU. If info = 0, successful exit. If info = j > 0, U is singular. U[j,j] is the first zero pivot. |
| hipblasStatus_t hipblasZgetrfBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasDoubleComplex *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorizations. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [out] | ipiv | pointer to int. Array on the GPU. Contains the vectors of pivot indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasZgetrfStridedBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrfStridedBatched computes the LU factorization of a batch of general n-by-n matrices using partial pivoting with row interchanges. The LU factorization can be done without pivoting if ipiv is passed as a nullptr.
In the case that ipiv is not null, the factorization of matrix \(A_i\) in the batch has the form:
\[ A_i = P_iL_iU_i \]
where \(P_i\) is a permutation matrix, \(L_i\) is lower triangular with unit diagonal elements, and \(U_i\) is upper triangular.
In the case that ipiv is null, the factorization is done without pivoting:
\[ A_i = L_iU_i \]
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of columns and rows of all matrices A_i in the batch. |
| [in,out] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). On entry, the n-by-n matrices A_i to be factored. On exit, the factors L_i and U_i from the factorization. The unit diagonal elements of L_i are not stored. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n |
| [out] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors of pivots indices ipiv_i (corresponding to A_i). Dimension of ipiv_i is n. Elements of ipiv_i are 1-based indices. For each instance A_i in the batch and for 1 <= j <= n, the row j of the matrix A_i was interchanged with row ipiv_i[j]. Matrix P_i of the factorization can be derived from ipiv_i. The factorization here can be done without pivoting if ipiv is passed in as a nullptr. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for factorization of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasZgetriBatched | ( | hipblasHandle_t | handle, |
| const int | n, | ||
| hipblasDoubleComplex *const | A[], | ||
| const int | lda, | ||
| int * | ipiv, | ||
| hipblasDoubleComplex *const | C[], | ||
| const int | ldc, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getriBatched computes the inverse \(C_i = A_i^{-1}\) of a batch of general n-by-n matrices \(A_i\).
The inverse is computed by solving the linear system
\[ A_i C_i = I \]
where I is the identity matrix, and \(A_i\) is factorized as \(A_i = P_i L_i U_i\) as given by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | n | int. n >= 0. The number of rows and columns of all matrices A_i in the batch. |
| [in] | A | array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. Specifies the leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). The pivot indices returned by getrfBatched. ipiv can be passed in as a nullptr, this will assume that getrfBatched was called without partial pivoting. |
| [out] | C | array of pointers to type. Each pointer points to an array on the GPU of dimension ldc*n. If info[i] = 0, the inverse of matrices A_i. Otherwise, undefined. |
| [in] | ldc | int. ldc >= n. Specifies the leading dimension of C_i. |
| [out] | info | pointer to int. Array of batchCount integers on the GPU. If info[i] = 0, successful exit for inversion of A_i. If info[i] = j > 0, U_i is singular. U_i[j,j] is the first zero pivot. |
| [in] | batchCount | int. batchCount >= 0. Number of matrices in the batch. |
| hipblasStatus_t hipblasZgetrs | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| hipblasDoubleComplex * | B, | ||
| const int | ldb, | ||
| int * | info | ||
| ) |
SOLVER API.
getrs solves a system of n linear equations on n variables in its factorized form.
It solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A X = B & \: \text{not transposed,}\\ A^T X = B & \: \text{transposed, or}\\ A^H X = B & \: \text{conjugate transposed.} \end{array} \]
Matrix A is defined by its triangular factors as returned by getrf.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of A. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of the matrix B. |
| [in] | A | pointer to type. Array on the GPU of dimension lda*n. The factors L and U of the factorization A = P*L*U returned by getrf. |
| [in] | lda | int. lda >= n. The leading dimension of A. |
| [in] | ipiv | pointer to int. Array on the GPU of dimension n. The pivot indices returned by getrf. |
| [in,out] | B | pointer to type. Array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrix B. On exit, the solution matrix X. |
| [in] | ldb | int. ldb >= n. The leading dimension of B. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| hipblasStatus_t hipblasZgetrsBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasDoubleComplex *const | A[], | ||
| const int | lda, | ||
| const int * | ipiv, | ||
| hipblasDoubleComplex *const | B[], | ||
| const int | ldb, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | Array of pointers to type. Each pointer points to an array on the GPU of dimension lda*n. The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | ipiv | pointer to int. Array on the GPU. Contains the vectors ipiv_i of pivot indices returned by getrfBatched. |
| [in,out] | B | Array of pointers to type. Each pointer points to an array on the GPU of dimension ldb*nrhs. On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasZgetrsStridedBatched | ( | hipblasHandle_t | handle, |
| const hipblasOperation_t | trans, | ||
| const int | n, | ||
| const int | nrhs, | ||
| hipblasDoubleComplex * | A, | ||
| const int | lda, | ||
| const hipblasStride | strideA, | ||
| const int * | ipiv, | ||
| const hipblasStride | strideP, | ||
| hipblasDoubleComplex * | B, | ||
| const int | ldb, | ||
| const hipblasStride | strideB, | ||
| int * | info, | ||
| const int | batchCount | ||
| ) |
SOLVER API.
getrsStridedBatched solves a batch of systems of n linear equations on n variables in its factorized forms.
For each instance i in the batch, it solves one of the following systems, depending on the value of trans:
\[ \begin{array}{cl} A_i X_i = B_i & \: \text{not transposed,}\\ A_i^T X_i = B_i & \: \text{transposed, or}\\ A_i^H X_i = B_i & \: \text{conjugate transposed.} \end{array} \]
Matrix \(A_i\) is defined by its triangular factors as returned by getrfStridedBatched.
| [in] | handle | hipblasHandle_t. |
| [in] | trans | hipblasOperation_t. Specifies the form of the system of equations of each instance in the batch. |
| [in] | n | int. n >= 0. The order of the system, i.e. the number of columns and rows of all A_i matrices. |
| [in] | nrhs | int. nrhs >= 0. The number of right hand sides, i.e., the number of columns of all the matrices B_i. |
| [in] | A | pointer to type. Array on the GPU (the size depends on the value of strideA). The factors L_i and U_i of the factorization A_i = P_i*L_i*U_i returned by getrfStridedBatched. |
| [in] | lda | int. lda >= n. The leading dimension of matrices A_i. |
| [in] | strideA | hipblasStride. Stride from the start of one matrix A_i to the next one A_(i+1). There is no restriction for the value of strideA. Normal use case is strideA >= lda*n. |
| [in] | ipiv | pointer to int. Array on the GPU (the size depends on the value of strideP). Contains the vectors ipiv_i of pivot indices returned by getrfStridedBatched. |
| [in] | strideP | hipblasStride. Stride from the start of one vector ipiv_i to the next one ipiv_(i+1). There is no restriction for the value of strideP. Normal use case is strideP >= n. |
| [in,out] | B | pointer to type. Array on the GPU (size depends on the value of strideB). On entry, the right hand side matrices B_i. On exit, the solution matrix X_i of each system in the batch. |
| [in] | ldb | int. ldb >= n. The leading dimension of matrices B_i. |
| [in] | strideB | hipblasStride. Stride from the start of one matrix B_i to the next one B_(i+1). There is no restriction for the value of strideB. Normal use case is strideB >= ldb*nrhs. |
| [out] | info | pointer to a int on the host. If info = 0, successful exit. If info = j < 0, the argument at position -j is invalid. |
| [in] | batchCount | int. batchCount >= 0. Number of instances (systems) in the batch. |
| hipblasStatus_t hipblasZhbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
hbmv performs the matrix-vector operations
y := alpha*A*x + beta*y
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian band matrix, with k super-diagonals.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is being supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is being supplied. |
| [in] | n | [int] the order of the matrix A. |
| [in] | k | [int] the number of super-diagonals of the matrix A. Must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. Of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The leading (k + 1) by n part of A must contain the upper triangular band part of the Hermitian matrix, with the leading diagonal in row (k + 1), the first super-diagonal on the RHS of row k, etc. The top left k by x triangle of A will not be referenced. Ex (upper, lda = n = 4, k = 1): A Represented matrix (0,0) (5,9) (6,8) (7,7) (1, 0) (5, 9) (0, 0) (0, 0) (1,0) (2,0) (3,0) (4,0) (5,-9) (2, 0) (6, 8) (0, 0) (0,0) (0,0) (0,0) (0,0) (0, 0) (6,-8) (3, 0) (7, 7) (0,0) (0,0) (0,0) (0,0) (0, 0) (0, 0) (7,-7) (4, 0) |
if uplo == HIPBLAS_FILL_MODE_LOWER: The leading (k + 1) by n part of A must contain the lower triangular band part of the Hermitian matrix, with the leading diagonal in row (1), the first sub-diagonal on the LHS of row 2, etc. The bottom right k by k triangle of A will not be referenced. Ex (lower, lda = 2, n = 4, k = 1): A Represented matrix (1,0) (2,0) (3,0) (4,0) (1, 0) (5,-9) (0, 0) (0, 0) (5,9) (6,8) (7,7) (0,0) (5, 9) (2, 0) (6,-8) (0, 0) (0, 0) (6, 8) (3, 0) (7,-7) (0, 0) (0, 0) (7, 7) (4, 0)
As a Hermitian matrix, the imaginary part of the main diagonal of A will not be referenced and is assumed to be == 0.
| [in] | lda | [int] specifies the leading dimension of A. must be >= k + 1 |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZhbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hbmvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is being supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is being supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | k | [int] the number of super-diagonals of each matrix A_i. Must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The leading (k + 1) by n part of each A_i must contain the upper triangular band part of the Hermitian matrix, with the leading diagonal in row (k + 1), the first super-diagonal on the RHS of row k, etc. The top left k by x triangle of each A_i will not be referenced. Ex (upper, lda = n = 4, k = 1): A Represented matrix (0,0) (5,9) (6,8) (7,7) (1, 0) (5, 9) (0, 0) (0, 0) (1,0) (2,0) (3,0) (4,0) (5,-9) (2, 0) (6, 8) (0, 0) (0,0) (0,0) (0,0) (0,0) (0, 0) (6,-8) (3, 0) (7, 7) (0,0) (0,0) (0,0) (0,0) (0, 0) (0, 0) (7,-7) (4, 0) |
if uplo == HIPBLAS_FILL_MODE_LOWER: The leading (k + 1) by n part of each A_i must contain the lower triangular band part of the Hermitian matrix, with the leading diagonal in row (1), the first sub-diagonal on the LHS of row 2, etc. The bottom right k by k triangle of each A_i will not be referenced. Ex (lower, lda = 2, n = 4, k = 1): A Represented matrix (1,0) (2,0) (3,0) (4,0) (1, 0) (5,-9) (0, 0) (0, 0) (5,9) (6,8) (7,7) (0,0) (5, 9) (2, 0) (6,-8) (0, 0) (0, 0) (6, 8) (3, 0) (7,-7) (0, 0) (0, 0) (7, 7) (4, 0)
As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0.
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hbmvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is being supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is being supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | k | [int] the number of super-diagonals of each matrix A_i. Must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array pointing to the first matrix A_1. Each A_i is of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The leading (k + 1) by n part of each A_i must contain the upper triangular band part of the Hermitian matrix, with the leading diagonal in row (k + 1), the first super-diagonal on the RHS of row k, etc. The top left k by x triangle of each A_i will not be referenced. Ex (upper, lda = n = 4, k = 1): A Represented matrix (0,0) (5,9) (6,8) (7,7) (1, 0) (5, 9) (0, 0) (0, 0) (1,0) (2,0) (3,0) (4,0) (5,-9) (2, 0) (6, 8) (0, 0) (0,0) (0,0) (0,0) (0,0) (0, 0) (6,-8) (3, 0) (7, 7) (0,0) (0,0) (0,0) (0,0) (0, 0) (0, 0) (7,-7) (4, 0) |
if uplo == HIPBLAS_FILL_MODE_LOWER: The leading (k + 1) by n part of each A_i must contain the lower triangular band part of the Hermitian matrix, with the leading diagonal in row (1), the first sub-diagonal on the LHS of row 2, etc. The bottom right k by k triangle of each A_i will not be referenced. Ex (lower, lda = 2, n = 4, k = 1): A Represented matrix (1,0) (2,0) (3,0) (4,0) (1, 0) (5,-9) (0, 0) (0, 0) (5,9) (6,8) (7,7) (0,0) (5, 9) (2, 0) (6,-8) (0, 0) (0, 0) (6, 8) (3, 0) (7,-7) (0, 0) (0, 0) (7, 7) (4, 0)
As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0.
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | device array pointing to the first vector y_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1) |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array pointing to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhemm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
hemm performs one of the matrix-matrix operations:
C := alpha*A*B + beta*C if side == HIPBLAS_SIDE_LEFT, C := alpha*B*A + beta*C if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B and C are m by n matrices, and A is a Hermitian matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C := alpha*A*B + beta*C HIPBLAS_SIDE_RIGHT: C := alpha*B*A + beta*C |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix |
| [in] | n | [int] n specifies the number of rows of B and C. n >= 0. |
| [in] | k | [int] n specifies the number of columns of B and C. k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A and B are not referenced. |
| [in] | AP | pointer storing matrix A on the GPU. A is m by m if side == HIPBLAS_SIDE_LEFT A is n by n if side == HIPBLAS_SIDE_RIGHT Only the upper/lower triangular part is accessed. The imaginary component of the diagonal elements is not used. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | pointer storing matrix B on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| hipblasStatus_t hipblasZhemmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
hemmBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a Hermitian matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | n | [int] n specifies the number of rows of B_i and C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of B_i and C_i. k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT Only the upper/lower triangular part is accessed. The imaginary component of the diagonal elements is not used. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C_i need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C_i. ldc >= max( 1, m ) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhemmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
hemmStridedBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a Hermitian matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | n | [int] n specifies the number of rows of B_i and C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of B_i and C_i. k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device pointer to first matrix A_1 A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT Only the upper/lower triangular part is accessed. The imaginary component of the diagonal elements is not used. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | device pointer to first matrix B_1 of dimension (ldb, n) on the GPU |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if side = HIPBLAS_OP_N, ldb >= max( 1, m ), otherwise ldb >= max( 1, n ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device pointer to first matrix C_1 of dimension (ldc, n) on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZhemv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
hemv performs one of the matrix-vector operations
y := alpha*A*x + beta*y
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied. |
| [in] | n | [int] the order of the matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing matrix A. Of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A must contain the upper triangular part of a Hermitian matrix. The lower triangular part of A will not be referenced. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A must contain the lower triangular part of a Hermitian matrix. The upper triangular part of A will not be referenced. As a Hermitian matrix, the imaginary part of the main diagonal of A will not be referenced and is assumed to be == 0. |
| [in] | lda | [int] specifies the leading dimension of A. must be >= max(1, n) |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZhemvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hemvBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i must contain the upper triangular part of a Hermitian matrix. The lower triangular part of each A_i will not be referenced. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i must contain the lower triangular part of a Hermitian matrix. The upper triangular part of each A_i will not be referenced. As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhemvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hemvStridedBatched performs one of the matrix-vector operations
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device array of device pointers storing each matrix A_i of dimension (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i must contain the upper triangular part of a Hermitian matrix. The lower triangular part of each A_i will not be referenced. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i must contain the lower triangular part of a Hermitian matrix. The upper triangular part of each A_i will not be referenced. As a Hermitian matrix, the imaginary part of the main diagonal of each A_i will not be referenced and is assumed to be == 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. must be >= max(1, n) |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) to the next (A_i+1) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZher | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
her performs the matrix-vector operations
A := A + alpha*x*x**H
where alpha is a real scalar, x is a vector, and A is an n by n Hermitian matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in A. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in A. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda * n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The lower triangluar portion will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The upper triangular portion will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of A. Must be at least max(1, n). |
| hipblasStatus_t hipblasZher2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
her2 performs the matrix-vector operations
A := A + alpha*x*y**H + conj(alpha)*y*x**H
where alpha is a complex scalar, x and y are vectors, and A is an n by n Hermitian matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The lower triangular portion of A will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The upper triangular portion of A will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of A. Must be at least max(lda, 1). |
| hipblasStatus_t hipblasZher2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
her2Batched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n Hermitian matrix for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing the specified triangular portion of each Hermitian matrix A_i of size (lda, n). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be at least max(lda, 1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZher2k | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| const double * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
her2k performs one of the matrix-matrix operations for a Hermitian rank-2k update
C := alpha*op( A )*op( B )^H + conj(alpha)*op( B )*op( A )^H + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrices, and C is a n x n Hermitian matrix stored as either upper or lower.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^H, op( B ) = B^H, and A and B are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A ) = A^H, op( B ) = B^H HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasZher2kBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| const double * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
her2kBatched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update
C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZher2kStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const double * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
her2kStridedBatched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update
C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A_i ) = A_i^H, op( B_i ) = B_i^H HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZher2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
her2StridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n Hermitian matrix for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the stride between the beginning of one vector (x_i) and the next (x_i+1). |
| [in] | y | device pointer pointing to the first vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the stride between the beginning of one vector (y_i) and the next (y_i+1). |
| [in,out] | AP | device pointer pointing to the first matrix (A_1). Stores the specified triangular portion of each Hermitian matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be at least max(lda, 1). |
| [in] | strideA | [hipblasStride] specifies the stride between the beginning of one matrix (A_i) and the next (A_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZherBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
herBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in A. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in A. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the specified triangular portion of each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. Must be at least max(1, n). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZherk | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const double * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
herk performs one of the matrix-matrix operations for a Hermitian rank-k update
C := alpha*op( A )*op( A )^H + beta*C
where alpha and beta are scalars, op(A) is an n by k matrix, and C is a n x n Hermitian matrix stored as either upper or lower.
op( A ) = A, and A is n by k if transA == HIPBLAS_OP_N op( A ) = A^H and A is k by n if transA == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_ON_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if transA = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasZherkBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const double * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A) is an n by k matrix, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^H and A_i is k by n if transA == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZherkStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const double * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkStridedBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i
where alpha and beta are scalars, op(A) is an n by k matrix, and C_i is a n x n Hermitian matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^H and A_i is k by n if transA == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZherkx | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| const double * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
herkx performs one of the matrix-matrix operations for a Hermitian rank-k update
C := alpha*op( A )*op( B )^H + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrices, and C is a n x n Hermitian matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^H, op( B ) = B^H, and A and B are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A ) = A^H, op( B ) = B^H HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasZherkxBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| const double * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkxBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op(A) = A^H HIPBLAS_OP_N: op(A) = A |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZherkxStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const double * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
herkxStridedBatched performs a batch of the matrix-matrix operations for a Hermitian rank-k update
C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and C_i is a n x n Hermitian matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^H, op( B_i ) = B_i^H, and A_i and B_i are k by n if trans == HIPBLAS_OP_C
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_C: op( A_i ) = A_i^H, op( B_i ) = B_i^H HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. The imaginary component of the diagonal elements are not used but are set to zero unless quick return. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZherStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
herStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n Hermitian matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in A. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in A. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device array of device pointers storing the specified triangular portion of each Hermitian matrix A_i. Points to the first matrix (A_1). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion of each A_i will not be touched. if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion of each A_i will not be touched. Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhpmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
hpmv performs the matrix-vector operation
y := alpha*A*x + beta*y
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian matrix, supplied in packed form (see description below).
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied in AP. |
| [in] | n | [int] the order of the matrix A, must be >= 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer storing the packed version of the specified triangular portion of the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)] (3,-2) (5, 1) (6, 0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)] (3,-2) (5, 1) (6, 0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZhpmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpmvBatched performs the matrix-vector operation
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, supplied in packed form (see description below), for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of each Hermitian matrix A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of each Hermitian matrix A_i is supplied in AP. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i. Each A_i is of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)] (3,-2) (5, 1) (6, 0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)] (3,-2) (5, 1) (6, 0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhpmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpmvStridedBatched performs the matrix-vector operation
y_i := alpha*A_i*x_i + beta*y_i
where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an n by n Hermitian matrix, supplied in packed form (see description below), for each batch in i = [1, batchCount].
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: the upper triangular part of each Hermitian matrix A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: the lower triangular part of each Hermitian matrix A_i is supplied in AP. |
| [in] | n | [int] the order of each matrix A_i. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | AP | device pointer pointing to the beginning of the first matrix (AP_1). Stores the packed version of the specified triangular portion of each Hermitian matrix AP_i of size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)] (3,-2) (5, 1) (6, 0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that each AP_i contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (3, 2) (2,-1) (4, 0) (5,-1) --—> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)] (3,-2) (5, 1) (6, 0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (AP_i) and the next one (AP_i+1). |
| [in] | x | device array pointing to the beginning of the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | beta | device pointer or host pointer to scalar beta. |
| [in,out] | y | device array pointing to the beginning of the first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhpr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | AP | ||
| ) |
BLAS Level 2 API.
hpr performs the matrix-vector operations
A := A + alpha*x*x**H
where alpha is a real scalar, x is a vector, and A is an n by n Hermitian matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| hipblasStatus_t hipblasZhpr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasDoubleComplex * | AP | ||
| ) |
BLAS Level 2 API.
hpr2 performs the matrix-vector operations
A := A + alpha*x*y**H + conj(alpha)*y*x**H
where alpha is a complex scalar, x and y are vectors, and A is an n by n Hermitian matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the Hermitian matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| hipblasStatus_t hipblasZhpr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpr2Batched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhpr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasDoubleComplex * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hpr2StridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in] | y | device pointer pointing to the first vector (y_1). |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i. Points to the first matrix (A_1). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhprBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hprBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZhprStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const double * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
hprStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**H
where alpha is a real scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each Hermitian matrix A_i. Points to the first matrix (A_1). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)] (4,-9) (5,-3) (6,0) if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each Hermitian matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3) (1, 0) (2, 1) (4,9) (2,-1) (3, 0) (5,3) --—> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)] (4,-9) (5,-3) (6,0) Note that the imaginary part of the diagonal elements are not accessed and are assumed to be 0. |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZrot | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| const double * | c, | ||
| const hipblasDoubleComplex * | s | ||
| ) |
BLAS Level 1 API.
rot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in the x and y vectors. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment between elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment between elements of y. |
| [in] | c | device pointer or host pointer storing scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer storing scalar sine component of the rotation matrix. |
| hipblasStatus_t hipblasZrotBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| const double * | c, | ||
| const hipblasDoubleComplex * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device array of deivce pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasZrotg | ( | hipblasHandle_t | handle, |
| hipblasDoubleComplex * | a, | ||
| hipblasDoubleComplex * | b, | ||
| double * | c, | ||
| hipblasDoubleComplex * | s | ||
| ) |
BLAS Level 1 API.
rotg creates the Givens rotation matrix for the vector (a b). Scalars c and s and arrays a and b may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device pointer or host pointer to input vector element, overwritten with r. |
| [in,out] | b | device pointer or host pointer to input vector element, overwritten with z. |
| [in,out] | c | device pointer or host pointer to cosine element of Givens rotation. |
| [in,out] | s | device pointer or host pointer sine element of Givens rotation. |
| hipblasStatus_t hipblasZrotgBatched | ( | hipblasHandle_t | handle, |
| hipblasDoubleComplex *const | a[], | ||
| hipblasDoubleComplex *const | b[], | ||
| double *const | c[], | ||
| hipblasDoubleComplex *const | s[], | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgBatched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_DEVICE, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device array of device pointers storing each single input vector element a_i, overwritten with r_i. |
| [in,out] | b | device array of device pointers storing each single input vector element b_i, overwritten with z_i. |
| [in,out] | c | device array of device pointers storing each cosine element of Givens rotation for the batch. |
| [in,out] | s | device array of device pointers storing each sine element of Givens rotation for the batch. |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasZrotgStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasDoubleComplex * | a, | ||
| hipblasStride | stridea, | ||
| hipblasDoubleComplex * | b, | ||
| hipblasStride | strideb, | ||
| double * | c, | ||
| hipblasStride | stridec, | ||
| hipblasDoubleComplex * | s, | ||
| hipblasStride | strides, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotgStridedBatched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batchCount. a, b, c, and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function blocks the CPU until the GPU has finished and the results are available in host memory. If the pointer mode is set to HIPBLAS_POINTER_MODE_HOST, this function returns immediately and synchronization is required to read the results.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in,out] | a | device strided_batched pointer or host strided_batched pointer to first single input vector element a_1, overwritten with r. |
| [in] | stridea | [hipblasStride] distance between elements of a in batch (distance between a_i and a_(i + 1)) |
| [in,out] | b | device strided_batched pointer or host strided_batched pointer to first single input vector element b_1, overwritten with z. |
| [in] | strideb | [hipblasStride] distance between elements of b in batch (distance between b_i and b_(i + 1)) |
| [in,out] | c | device strided_batched pointer or host strided_batched pointer to first cosine element of Givens rotations c_1. |
| [in] | stridec | [hipblasStride] distance between elements of c in batch (distance between c_i and c_(i + 1)) |
| [in,out] | s | device strided_batched pointer or host strided_batched pointer to sine element of Givens rotations s_1. |
| [in] | strides | [hipblasStride] distance between elements of s in batch (distance between s_i and s_(i + 1)) |
| [in] | batchCount | [int] number of batches (length of arrays a, b, c, and s). |
| hipblasStatus_t hipblasZrotStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| const double * | c, | ||
| const hipblasDoubleComplex * | s, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
rotStridedBatched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batchCount. Scalars c and s may be stored in either host or device memory, location is specified by calling hipblasSetPointerMode.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] number of elements in each x_i and y_i vectors. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment between elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the increment from the beginning of x_i to the beginning of x_(i+1) |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment between elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the increment from the beginning of y_i to the beginning of y_(i+1) |
| [in] | c | device pointer or host pointer to scalar cosine component of the rotation matrix. |
| [in] | s | device pointer or host pointer to scalar sine component of the rotation matrix. |
| [in] | batchCount | [int] the number of x and y arrays, i.e. the number of batches. |
| hipblasStatus_t hipblasZscal | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 1 API.
scal scales each element of vector x with scalar alpha.
x := alpha * x
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x. |
| [in] | alpha | device pointer or host pointer for the scalar alpha. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasZscalBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasZscalStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
scalStridedBatched scales each element of vector x_i with scalar alpha, for i = 1, ... , batchCount.
x_i := alpha * x_i ,
where (x_i) is the i-th instance of the batch.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i. |
| [in] | alpha | host pointer or device pointer for the scalar alpha. |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in] | batchCount | [int] specifies the number of batches in x. |
| hipblasStatus_t hipblasZspr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | AP | ||
| ) |
BLAS Level 2 API.
spr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix, supplied in packed form.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP. |
| [in] | n | [int] the number of rows and columns of matrix A, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of the symmetric matrix A. Of at least size ((n * (n + 1)) / 2). if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of the symmetric matrix A is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| hipblasStatus_t hipblasZsprBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing the packed version of the specified triangular portion of each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsprStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | AP, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
sprStridedBatched performs the matrix-vector operations
A_i := A_i + alpha*x_i*x_i**T
where alpha is a scalar, x_i is a vector, and A_i is an n by n symmetric matrix, supplied in packed form, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP. HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP. |
| [in] | n | [int] the number of rows and columns of each matrix A_i, must be at least 0. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer pointing to the first vector (x_1). |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). |
| [in,out] | AP | device pointer storing the packed version of the specified triangular portion of each symmetric matrix A_i. Points to the first A_1. if uplo == HIPBLAS_FILL_MODE_UPPER: The upper triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(0,1) AP(2) = A(1,1), etc. Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4) 1 2 4 7 2 3 5 8 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 4 5 6 9 7 8 9 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The lower triangular portion of each symmetric matrix A_i is supplied. The matrix is compacted so that AP contains the triangular portion column-by-column so that: AP(0) = A(0,0) AP(1) = A(1,0) AP(2) = A(2,1), etc. Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4) 1 2 3 4 2 5 6 7 --—> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] 3 6 8 9 4 7 9 0 |
| [in] | strideA | [hipblasStride] stride from the start of one (A_i) and the next (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZswap | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 1 API.
swap interchanges vectors x and y.
y := x; x := y
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in x and y. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| hipblasStatus_t hipblasZswapBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZswapStridedBatched | ( | hipblasHandle_t | handle, |
| int | n, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 1 API.
swapStridedBatched interchanges vectors x_i and y_i, for i = 1 , ... , batchCount
y_i := x_i; x_i := y_i
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | n | [int] the number of elements in each x_i and y_i. |
| [in,out] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_x is of appropriate size, for a typical case this means stride_x >= n * incx. |
| [in,out] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stride_x, however the user should take care to ensure that stride_y is of appropriate size, for a typical case this means stride_y >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsymm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
symm performs one of the matrix-matrix operations:
C := alpha*A*B + beta*C if side == HIPBLAS_SIDE_LEFT, C := alpha*B*A + beta*C if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B and C are m by n matrices, and A is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C := alpha*A*B + beta*C HIPBLAS_SIDE_RIGHT: C := alpha*B*A + beta*C |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B and C. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B and C. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A and B are not referenced. |
| [in] | AP | pointer storing matrix A on the GPU. A is m by m if side == HIPBLAS_SIDE_LEFT A is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | pointer storing matrix B on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ) |
| hipblasStatus_t hipblasZsymmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | BP | device array of device pointers storing each matrix B_i on the GPU. Matrix dimension is m by n |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C_i need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. Matrix dimension is m by n |
| [in] | ldc | [int] ldc specifies the first dimension of C_i. ldc >= max( 1, m ) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsymmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
symmStridedBatched performs a batch of the matrix-matrix operations:
C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT, C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,
where alpha and beta are scalars, B_i and C_i are m by n matrices, and A_i is a symmetric matrix stored as either upper or lower.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: C_i := alpha*A_i*B_i + beta*C_i HIPBLAS_SIDE_RIGHT: C_i := alpha*B_i*A_i + beta*C_i |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix |
| [in] | m | [int] m specifies the number of rows of B_i and C_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i and C_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i and B_i are not referenced. |
| [in] | AP | device pointer to first matrix A_1 A_i is m by m if side == HIPBLAS_SIDE_LEFT A_i is n by n if side == HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), otherwise lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | device pointer to first matrix B_1 of dimension (ldb, n) on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ) |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device pointer to first matrix C_1 of dimension (ldc, n) on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, m ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsymv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy | ||
| ) |
BLAS Level 2 API.
symv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A should contain an upper or lower triangular n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] |
| [in] | alpha | specifies the scalar alpha |
| [in] | AP | pointer storing matrix A on the GPU |
| [in] | lda | [int] specifies the leading dimension of A |
| [in] | x | pointer storing vector x on the GPU |
| [in] | incx | [int] specifies the increment for the elements of x |
| [in] | beta | specifies the scalar beta |
| [out] | y | pointer storing vector y on the GPU |
| [in] | incy | [int] specifies the increment for the elements of y |
| hipblasStatus_t hipblasZsymvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | device array of device pointers storing each matrix A_i |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | x | device array of device pointers storing each vector x_i |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | device array of device pointers storing each vector y_i |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZsymvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
symvStridedBatched performs the matrix-vector operation:
y_i := alpha*A_i*x_i + beta*y_i,
where (A_i, x_i, y_i) is the i-th instance of the batch. alpha and beta are scalars, x_i and y_i are vectors and A_i is an n by n symmetric matrix, for i = 1, ..., batchCount. A a should contain an upper or lower triangular symmetric matrix and the opposing triangular part of A is not referenced
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] number of rows and columns of each matrix A_i |
| [in] | alpha | device pointer or host pointer to scalar alpha |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU |
| [in] | lda | [int] specifies the leading dimension of each matrix A_i |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | x | Device pointer to the first vector x_1 on the GPU |
| [in] | incx | [int] specifies the increment for the elements of each vector x_i |
| [in] | stridex | [hipblasStride] stride from the start of one vector (x_i) and the next one (x_i+1). There are no restrictions placed on stridex, however the user should take care to ensure that stridex is of appropriate size. This typically means stridex >= n * incx. stridex should be non zero. |
| [in] | beta | device pointer or host pointer to scalar beta |
| [out] | y | Device pointer to the first vector y_1 on the GPU |
| [in] | incy | [int] specifies the increment for the elements of each vector y_i |
| [in] | stridey | [hipblasStride] stride from the start of one vector (y_i) and the next one (y_i+1). There are no restrictions placed on stridey, however the user should take care to ensure that stridey is of appropriate size. This typically means stridey >= n * incy. stridey should be non zero. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZsyr | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr performs the matrix-vector operations
A := A + alpha*x*x**T
where alpha is a scalar, x is a vector, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasZsyr2 | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda | ||
| ) |
BLAS Level 2 API.
syr2 performs the matrix-vector operations
A := A + alpha*x*y**T + alpha*y*x**T
where alpha is a scalar, x and y are vectors, and A is an n by n symmetric matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | y | device pointer storing vector y. |
| [in] | incy | [int] specifies the increment for the elements of y. |
| [in,out] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| hipblasStatus_t hipblasZsyr2Batched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| const hipblasDoubleComplex *const | y[], | ||
| int | incy, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2Batched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrix, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | y | device array of device pointers storing each vector y_i. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZsyr2k | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syr2k performs one of the matrix-matrix operations for a symmetric rank-2k update
C := alpha*(op( A )*op( B )^T + op( B )*op( A )^T) + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasZsyr2kBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsyr2kStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syr2kStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-2k update
C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsyr2StridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| const hipblasDoubleComplex * | y, | ||
| int | incy, | ||
| hipblasStride | stridey, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syr2StridedBatched the matrix-vector operations
A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in] | y | device pointer to the first vector y_1. |
| [in] | incy | [int] specifies the increment for the elements of each y_i. |
| [in] | stridey | [hipblasStride] specifies the pointer increment between vectors (y_i) and (y_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZsyrBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrBatched performs a batch of matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, x is an array of vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in,out] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZsyrk | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrk performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( A )^T + beta*C
where alpha and beta are scalars, op(A) is an n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower.
op( A ) = A, and A is n by k if transA == HIPBLAS_OP_N op( A ) = A^T and A is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if transA = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasZsyrkBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsyrkStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) is an n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower.
op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op(A) = A^T HIPBLAS_OP_N: op(A) = A HIPBLAS_OP_C: op(A) = A^T |
HIPBLAS_OP_C is not supported for complex types, see cherk and zherk.
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when transA is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if transA = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsyrkx | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
BLAS Level 3 API.
syrkx performs one of the matrix-matrix operations for a symmetric rank-k update
C := alpha*op( A )*op( B )^T + beta*C
where alpha and beta are scalars, op(A) and op(B) are n by k matrix, and C is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be symmetric.
op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A ) = A^T, op( B ) = B^T HIPBLAS_OP_N: op( A ) = A, op( B ) = B |
| [in] | n | [int] n specifies the number of rows and columns of C. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A) and op(B). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | pointer storing matrix A on the GPU. Martrix dimension is ( lda, k ) when if trans = HIPBLAS_OP_N, otherwise (lda, n) only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | pointer storing matrix B on the GPU. Martrix dimension is ( ldb, k ) when if trans = HIPBLAS_OP_N, otherwise (ldb, n) only the upper/lower triangular part is accessed. |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | pointer storing matrix C on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| hipblasStatus_t hipblasZsyrkxBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix_i A of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | BP | device array of device pointers storing each matrix_i B of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | device array of device pointers storing each matrix C_i on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsyrkxStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| const hipblasDoubleComplex * | beta, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
syrkxStridedBatched performs a batch of the matrix-matrix operations for a symmetric rank-k update
C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i
where alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and C_i is a symmetric n x n matrix stored as either upper or lower. This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == HIPBLAS_OP_T
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: C_i is an upper triangular matrix HIPBLAS_FILL_MODE_LOWER: C_i is a lower triangular matrix |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_T: op( A_i ) = A_i^T, op( B_i ) = B_i^T HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i |
| [in] | n | [int] n specifies the number of rows and columns of C_i. n >= 0. |
| [in] | k | [int] k specifies the number of columns of op(A). k >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and A need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_1 on the GPU of dimension (lda, k) when trans is HIPBLAS_OP_N, otherwise of dimension (lda, n) |
| [in] | lda | [int] lda specifies the first dimension of A_i. if trans = HIPBLAS_OP_N, lda >= max( 1, n ), otherwise lda >= max( 1, k ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | BP | Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k) when trans is HIPBLAS_OP_N, otherwise of dimension (ldb, n) |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. if trans = HIPBLAS_OP_N, ldb >= max( 1, n ), otherwise ldb >= max( 1, k ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | beta | beta specifies the scalar beta. When beta is zero then C need not be set before entry. |
| [in] | CP | Device pointer to the first matrix C_1 on the GPU. |
| [in] | ldc | [int] ldc specifies the first dimension of C. ldc >= max( 1, n ). |
| [in,out] | strideC | [hipblasStride] stride from the start of one matrix (C_i) and the next one (C_i+1) |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZsyrStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
syrStridedBatched performs the matrix-vector operations
A[i] := A[i] + alpha*x[i]*x[i]**T
where alpha is a scalar, vectors, and A is an array of n by n symmetric matrices, for i = 1 , ... , batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | n | [int] the number of rows and columns of each matrix A. |
| [in] | alpha | device pointer or host pointer to scalar alpha. |
| [in] | x | device pointer to the first vector x_1. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the pointer increment between vectors (x_i) and (x_i+1). |
| [in,out] | AP | device pointer to the first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZtbmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbmv performs one of the matrix-vector operations
x := A*x or x := A**T*x or x := A**H*x,
x is a vectors and A is a banded m by m matrix (see description below).
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether matrix A is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of A is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of A's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by A. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of the matrix A. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of the matrix A. k must satisfy k > 0 && k < lda. |
| [in] | AP | device pointer storing banded triangular matrix A. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of A. lda must satisfy lda > k. |
| [in,out] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasZtbmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array of device pointers storing each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in,out] | x | device array of device pointer storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZtbmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | k, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbmvStridedBatched performs one of the matrix-vector operations
x_i := A_i*x_i or x_i := A_i**T*x_i or x_i := A_i**H*x_i,
where (A_i, x_i) is the i-th instance of the batch. x_i is a vector and A_i is an m by m matrix, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix. |
| [in] | transA | [hipblasOperation_t] indicates whether each matrix A_i is tranposed (conjugated) or not. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only 1's and is not referenced. HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal. |
| [in] | m | [int] the number of rows and columns of the matrix represented by each A_i. |
| [in] | k | [int] if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals of each matrix A_i. if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals of each matrix A_i. k must satisfy k > 0 && k < lda. |
| [in] | AP | device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i. if uplo == HIPBLAS_FILL_MODE_UPPER: The matrix represented is an upper banded triangular matrix with the main diagonal and k super-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the k'th row, the first super diagonal resides on the RHS of the k-1'th row, etc, with the k'th diagonal on the RHS of the 0'th row. Ex: (HIPBLAS_FILL_MODE_UPPER; m = 5; k = 2) 1 6 9 0 0 0 0 9 8 7 0 2 7 8 0 0 6 7 8 9 0 0 3 8 7 -—> 1 2 3 4 5 0 0 0 4 9 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 if uplo == HIPBLAS_FILL_MODE_LOWER: The matrix represnted is a lower banded triangular matrix with the main diagonal and k sub-diagonals, everything else can be assumed to be 0. The matrix is compacted so that the main diagonal resides on the 0'th row, working up to the k'th diagonal residing on the LHS of the k'th row. Ex: (HIPBLAS_FILL_MODE_LOWER; m = 5; k = 2) 1 0 0 0 0 1 2 3 4 5 6 2 0 0 0 6 7 8 9 0 9 7 3 0 0 -—> 9 8 7 0 0 0 8 8 4 0 0 0 0 0 0 0 0 7 9 5 0 0 0 0 0 |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda must satisfy lda > k. |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | x | device array to the first vector x_i of the batch. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i matrix to the next x_(i + 1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZtbsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tbsv solves
A*x = b or A**T*x = b or A**H*x = b,
where x and b are vectors and A is a banded triangular matrix.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of b. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of A. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of A. k >= 0. |
| [in] | AP | device pointer storing the matrix A in banded format. |
| [in] | lda | [int] specifies the leading dimension of A. lda >= (k + 1). |
| [in,out] | x | device pointer storing input vector b. Overwritten by the output vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasZtbsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device vector of device pointers storing each matrix A_i in banded format. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in,out] | x | device vector of device pointers storing each input vector b_i. Overwritten by each output vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZtbsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| int | k, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tbsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a banded triangular matrix, for i = [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A_i*x_i = b_i HIPBLAS_OP_T: Solves A_i**T*x_i = b_i HIPBLAS_OP_C: Solves A_i**H*x_i = b_i |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | n | [int] n specifies the number of rows of each b_i. n >= 0. |
| [in] | k | [int] if(uplo == HIPBLAS_FILL_MODE_UPPER) k specifies the number of super-diagonals of each A_i. if(uplo == HIPBLAS_FILL_MODE_LOWER) k specifies the number of sub-diagonals of each A_i. k >= 0. |
| [in] | AP | device pointer pointing to the first banded matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda >= (k + 1). |
| [in] | strideA | [hipblasStride] specifies the distance between the start of one matrix (A_i) and the next (A_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by output vectors x. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] specifies the distance between the start of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] number of instances in the batch. |
| hipblasStatus_t hipblasZtpmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix, supplied in the pack form.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension at leat ( m * ( m + 1 ) / 2 ). Before entry with uplo = HIPBLAS_FILL_MODE_UPPER, the array A must contain the upper triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{0,1} and a_{1, 1} respectively, and so on. Before entry with uplo = HIPBLAS_FILL_MODE_LOWER, the array A must contain the lower triangular matrix packed sequentially, column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain a_{1,0} and a_{2,0} respectively, and so on. Note that when DIAG = HIPBLAS_DIAG_UNIT, the diagonal elements of A are not referenced, but are assumed to be unity. |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. incx must not be zero. |
| hipblasStatus_t hipblasZtpmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasZtpmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasZtpsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
tpsv solves
A*x = b or A**T*x = b, or A**H*x = b,
where x and b are vectors and A is a triangular matrix stored in the packed format.
The input vector b is overwritten by the output vector x.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular (i.e. the diagonal elements of A are not used in computations). HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing the packed version of matrix A, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device pointer storing vector b on input, overwritten by x on output. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasZtpsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device array of device pointers storing the packed versions of each matrix A_i, of dimension >= (n * (n + 1) / 2) |
| [in,out] | x | device array of device pointers storing each input vector b_i, overwritten by x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasZtpsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
tpsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,
where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format, for i in [1, batchCount].
The input vectors b_i are overwritten by the output vectors x_i.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: Solves A*x = b HIPBLAS_OP_T: Solves A**T*x = b HIPBLAS_OP_C: Solves A**H*x = b |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular (i.e. the diagonal elements of each A_i are not used in computations). HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer pointing to the first packed matrix A_1, of dimension >= (n * (n + 1) / 2) |
| [in] | strideA | [hipblasStride] stride from the beginning of one packed matrix (AP_i) and the next (AP_i+1). |
| [in,out] | x | device pointer pointing to the first input vector b_1. Overwritten by each x_i on output. |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | stridex | [hipblasStride] stride from the beginning of one vector (x_i) and the next (x_i+1). |
| [in] | batchCount | [int] specifies the number of instances in the batch. |
| hipblasStatus_t hipblasZtrmm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasDoubleComplex * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
float* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
const float* BP,
int ldb,
float* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
double* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
const double* BP,
int ldb,
double* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
const hipblasComplex* BP,
int ldb,
hipblasComplex* CP,
int ldc);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmm(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasDoubleComplex* BP,
int ldb);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmOutofplace(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
const hipblasDoubleComplex* BP,
int ldb,
hipblasDoubleComplex* CP,
int ldc);
#endif
The deprecated Legacy BLAS in-place trmm performs one of the matrix-matrix operations:
B := alpha*op( A )*B, or B := alpha*B*op( A ),
The new trmm performs one of the matrix-matrix operations:
C := alpha*op( A )*B, or C := alpha*B*op( A ),
The in-place functionality is still available in the new trmmm by setting pointer C equal to pointer B, and ldc equal to ldb.
alpha is a scalar, B is an m by n matrix, C is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A^T or op( A ) = A^H. When uplo == rocblas_fill_upper the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. When uplo == rocblas_fill_lower the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Here k is m when side == rocblas_side_left and is n when side == rocblas_side_right. Note that when diag == rocblas_diagonal_unit the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A) multiplies B from the left or right as follows: HIPBLAS_SIDE_LEFT: B := alpha*op( A )*B. HIPBLAS_SIDE_RIGHT: B := alpha*B*op( A ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A is unit triangular as follows: HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. |
| [in] | AP | Device pointer to matrix A on the GPU. A has dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B must contain the matrix B, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasZtrmmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
float* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* const AP[],
int lda,
const float* const BP[],
int ldb,
float* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
double* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* const AP[],
int lda,
const double* const BP[],
int ldb,
double* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
hipblasComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* const AP[],
int lda,
const hipblasComplex* const BP[],
int ldb,
hipblasComplex* const CP[],
int ldc,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
hipblasDoubleComplex* const BP[],
int ldb,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* const AP[],
int lda,
const hipblasDoubleComplex* const BP[],
int ldb,
hipblasDoubleComplex* const CP[],
int ldc,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_batched performs one of the batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B and ldc equal to ldb.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device array of device pointers storing each matrix A_i on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasZtrmmBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| const hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| hipblasDoubleComplex *const | CP[], | ||
| int | ldc, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasZtrmmOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasDoubleComplex * | CP, | ||
| int | ldc | ||
| ) |
| hipblasStatus_t hipblasZtrmmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
The hipBLAS trmm_strided_batched API is from Legacy BLAS and it supports only in-place functionality. It is deprecated and it will be replaced with an API that supports both in-place and out-of-place functionality. The new API is available in hipBLAS versions 1.x.x and later. To get the new API compile with the directive -DHIPBLAS_V1.
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasStrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
float* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasStrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const float* alpha,
const float* AP,
int lda,
hipblasStride strideA,
const float* BP,
int ldb,
hipblasStride strideB,
float* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasDtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
double* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasDtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const double* alpha,
const double* AP,
int lda,
hipblasStride strideA,
const double* BP,
int ldb,
hipblasStride strideB,
double* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasCtrmmStridedBatched(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
hipblasComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasCtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasComplex* alpha,
const hipblasComplex* AP,
int lda,
hipblasStride strideA,
const hipblasComplex* BP,
int ldb,
hipblasStride strideB,
hipblasComplex* CP,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
#ifndef HIPBLAS_V1 // deprecated
hipblasStatus_t hipblasZtrmmStridedBatched(hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
int batchCount);
#else // available in hipBLAS version 1.x.x and later with -DHIPBLAS_V1
hipblasStatus_t hipblasZtrmmStridedBatchedOutofplace(
hipblasHandle_t handle,
hipblasSideMode_t side,
hipblasFillMode_t uplo,
hipblasOperation_t transA,
hipblasDiagType_t diag,
int m,
int n,
const hipblasDoubleComplex* alpha,
const hipblasDoubleComplex* AP,
int lda,
hipblasStride strideA,
const hipblasDoubleComplex* BP,
int ldb,
hipblasStride strideB,
hipblasDoubleComplex* BC,
int ldc,
hipblasStride strideC,
int batchCount);
#endif
The deprecated Legacy BLAS in-place trmm_strided_batched performs one of the strided_batched matrix-matrix operations:
B_i := alpha*op( A_i )*B_i, or B_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The new trmm_batched performs one of the matrix-matrix operations:
C_i := alpha*op( A_i )*B_i, or C_i := alpha*B_i*op( A_i ) for i = 0, 1, ... batch_count -1,
The in-place functionality is still available in the new trmmm_batched by setting pointer C equal to pointer B, setting ldc equal to ldb, and setting stride_C equal to stride_B.
alpha is a scalar, B_i is an m by n matrix, C_i is an m by n matrix, A_i is a unit, or non-unit, upper or lower triangular matrix and op( A_i ) is one of op( A_i ) = A_i or op( A_i ) = A_i^T or op( A_i ) = A_i^H.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] Specifies whether op(A_i) multiplies B_i from the left or right as follows: HIPBLAS_SIDE_LEFT: B_i := alpha*op( A_i )*B_i. HIPBLAS_SIDE_RIGHT: B_i := alpha*B_i*op( A_i ). |
| [in] | uplo | [hipblasFillMode_t] Specifies whether the matrix A is an upper or lower triangular matrix as follows: HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] Specifies the form of op(A_i) to be used in the matrix multiplication as follows: HIPBLAS_OP_N: op(A_i) = A_i. HIPBLAS_OP_T: op(A_i) = A_i^T. HIPBLAS_OP_C: op(A_i) = A_i^H. |
| [in] | diag | [hipblasDiagType_t] Specifies whether or not A_i is unit triangular as follows: HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B_i. n >= 0. |
| [in] | alpha | alpha specifies the scalar alpha. When alpha is zero then A_i is not referenced and B_i need not be set before entry. |
| [in] | AP | Device pointer to the first matrix A_0 on the GPU. Each A_i is of dimension ( lda, k ), where k is m when side == HIPBLAS_SIDE_LEFT and is n when side == HIPBLAS_SIDE_RIGHT. |
When uplo == HIPBLAS_FILL_MODE_UPPER the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced.
When uplo == HIPBLAS_FILL_MODE_LOWER the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced.
Note that when diag == HIPBLAS_DIAG_UNIT the diagonal elements of A_i are not referenced either, but are assumed to be unity.
| [in] | lda | [int] lda specifies the first dimension of A. if side == HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side == HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one matrix (A_i) and the next one (A_i+1) |
| [in,out] | BP | Device pointer to the first matrix B_0 on the GPU. On entry, the leading m by n part of the array B_i must contain the matrix B_i, and on exit is overwritten by the transformed matrix. |
| [in] | ldb | [int] ldb specifies the first dimension of B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one matrix (B_i) and the next one (B_i+1) |
| [in] | batchCount | [int] number of instances i in the batch. |
| hipblasStatus_t hipblasZtrmmStridedBatchedOutofplace | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| const hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| hipblasDoubleComplex * | BC, | ||
| int | ldc, | ||
| hipblasStride | strideC, | ||
| int | batchCount | ||
| ) |
| hipblasStatus_t hipblasZtrmv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trmv performs one of the matrix-vector operations
x = A*x or x = A**T*x,
where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.
The vector x is overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of A. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasZtrmvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer storing pointer of matrices A_i, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | x | device pointer storing vectors x_i. |
| [in] | incx | [int] specifies the increment for the elements of vectors x_i. |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasZtrmvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trmvStridedBatched performs one of the matrix-vector operations
x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount
where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix) with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
The vectors x_i are overwritten.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of matrices A_i. m >= 0. |
| [in] | AP | device pointer of the matrix A_0, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A_i. lda >= max( 1, m ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_{i + 1} |
| [in] | x | device pointer storing the vector x_0. |
| [in] | incx | [int] specifies the increment for the elements of one vector x. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_{i + 1} |
| [in] | batchCount | [int] The number of batched matrices/vectors. |
| hipblasStatus_t hipblasZtrsm | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasDoubleComplex * | BP, | ||
| int | ldb | ||
| ) |
BLAS Level 3 API.
trsm solves
op(A)*X = alpha*B or X*op(A) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is triangular matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
The matrix X is overwritten on B.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time.
(where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of B. m >= 0. |
| [in] | n | [int] n specifies the number of columns of B. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer storing matrix A. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of A. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device pointer storing matrix B. |
| [in] | ldb | [int] ldb specifies the first dimension of B. ldb >= max( 1, m ). |
| hipblasStatus_t hipblasZtrsmBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| hipblasDoubleComplex *const | BP[], | ||
| int | ldb, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmBatched performs the following batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are batched m by n matrices, A is triangular batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device array of device pointers storing each matrix A_i on the GPU. Matricies are of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in,out] | BP | device array of device pointers storing each matrix B_i on the GPU. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasZtrsmStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasSideMode_t | side, | ||
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| int | n, | ||
| const hipblasDoubleComplex * | alpha, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | BP, | ||
| int | ldb, | ||
| hipblasStride | strideB, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trsmSridedBatched performs the following strided batched operation:
op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i, for i = 1, ..., batchCount.
where alpha is a scalar, X and B are strided batched m by n matrices, A is triangular strided batched matrix and op(A) is one of
op( A ) = A or op( A ) = A^T or op( A ) = A^H.
Each matrix X_i is overwritten on B_i for i = 1, ..., batchCount.
Note about memory allocation: When trsm is launched with a k evenly divisible by the internal block size of 128, and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated memory found in the handle to increase overall performance. This memory can be managed by using the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory used for temporary storage will default to 1 MB and may result in chunking, which in turn may reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set to the desired chunk of right hand sides to be used at a time. (where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT)
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | side | [hipblasSideMode_t] HIPBLAS_SIDE_LEFT: op(A)*X = alpha*B. HIPBLAS_SIDE_RIGHT: X*op(A) = alpha*B. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: each A_i is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: each A_i is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] HIPBLAS_OP_N: op(A) = A. HIPBLAS_OP_T: op(A) = A^T. HIPBLAS_OP_C: op(A) = A^H. |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: each A_i is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each B_i. m >= 0. |
| [in] | n | [int] n specifies the number of columns of each B_i. n >= 0. |
| [in] | alpha | device pointer or host pointer specifying the scalar alpha. When alpha is &zero then A is not referenced and B need not be set before entry. |
| [in] | AP | device pointer pointing to the first matrix A_1. of dimension ( lda, k ), where k is m when HIPBLAS_SIDE_LEFT and is n when HIPBLAS_SIDE_RIGHT only the upper/lower triangular part is accessed. |
| [in] | lda | [int] lda specifies the first dimension of each A_i. if side = HIPBLAS_SIDE_LEFT, lda >= max( 1, m ), if side = HIPBLAS_SIDE_RIGHT, lda >= max( 1, n ). |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1). |
| [in,out] | BP | device pointer pointing to the first matrix B_1. |
| [in] | ldb | [int] ldb specifies the first dimension of each B_i. ldb >= max( 1, m ). |
| [in] | strideB | [hipblasStride] stride from the start of one B_i matrix to the next B_(i + 1). |
| [in] | batchCount | [int] number of trsm operatons in the batch. |
| hipblasStatus_t hipblasZtrsv | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx | ||
| ) |
BLAS Level 2 API.
trsv solves
A*x = b or A**T*x = b,
where x and b are vectors and A is a triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device pointer storing matrix A, of dimension ( lda, m ) |
| [in] | lda | [int] specifies the leading dimension of A. lda = max( 1, m ). |
| [in] | x | device pointer storing vector x. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| hipblasStatus_t hipblasZtrsvBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| hipblasDoubleComplex *const | x[], | ||
| int | incx, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of b. m >= 0. |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max(1, m) |
| [in] | x | device array of device pointers storing each vector x_i. |
| [in] | incx | [int] specifies the increment for the elements of x. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZtrsvStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasOperation_t | transA, | ||
| hipblasDiagType_t | diag, | ||
| int | m, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | x, | ||
| int | incx, | ||
| hipblasStride | stridex, | ||
| int | batchCount | ||
| ) |
BLAS Level 2 API.
trsvStridedBatched solves
A_i*x_i = b_i or A_i**T*x_i = b_i,
where (A_i, x_i, b_i) is the i-th instance of the batch. x_i and b_i are vectors and A_i is an m by m triangular matrix, for i = 1, ..., batchCount.
The vector x is overwritten on b.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] HIPBLAS_FILL_MODE_UPPER: A is an upper triangular matrix. HIPBLAS_FILL_MODE_LOWER: A is a lower triangular matrix. |
| [in] | transA | [hipblasOperation_t] |
| [in] | diag | [hipblasDiagType_t] HIPBLAS_DIAG_UNIT: A is assumed to be unit triangular. HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular. |
| [in] | m | [int] m specifies the number of rows of each b_i. m >= 0. |
| [in] | AP | device pointer to the first matrix (A_1) in the batch, of dimension ( lda, m ) |
| [in] | strideA | [hipblasStride] stride from the start of one A_i matrix to the next A_(i + 1) |
| [in] | lda | [int] specifies the leading dimension of each A_i. lda = max( 1, m ). |
| [in,out] | x | device pointer to the first vector (x_1) in the batch. |
| [in] | stridex | [hipblasStride] stride from the start of one x_i vector to the next x_(i + 1) |
| [in] | incx | [int] specifies the increment for the elements of each x_i. |
| [in] | batchCount | [int] number of instances in the batch |
| hipblasStatus_t hipblasZtrtri | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasDoubleComplex * | invA, | ||
| int | ldinvA | ||
| ) |
BLAS Level 3 API.
trtri compute the inverse of a matrix A, namely, invA
and write the result into invA;
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' if HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced if HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] size of matrix A and invA |
| [in] | AP | device pointer storing matrix A. |
| [in] | lda | [int] specifies the leading dimension of A. |
| [out] | invA | device pointer storing matrix invA. |
| [in] | ldinvA | [int] specifies the leading dimension of invA. |
| hipblasStatus_t hipblasZtrtriBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const hipblasDoubleComplex *const | AP[], | ||
| int | lda, | ||
| hipblasDoubleComplex * | invA[], | ||
| int | ldinvA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount.
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device array of device pointers storing each matrix A_i. |
| [in] | lda | [int] specifies the leading dimension of each A_i. |
| [out] | invA | device array of device pointers storing the inverse of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | batchCount | [int] numbers of matrices in the batch |
| hipblasStatus_t hipblasZtrtriStridedBatched | ( | hipblasHandle_t | handle, |
| hipblasFillMode_t | uplo, | ||
| hipblasDiagType_t | diag, | ||
| int | n, | ||
| const hipblasDoubleComplex * | AP, | ||
| int | lda, | ||
| hipblasStride | strideA, | ||
| hipblasDoubleComplex * | invA, | ||
| int | ldinvA, | ||
| hipblasStride | stride_invA, | ||
| int | batchCount | ||
| ) |
BLAS Level 3 API.
trtriStridedBatched compute the inverse of A_i and write into invA_i where A_i and invA_i are the i-th matrices in the batch, for i = 1, ..., batchCount
| [in] | handle | [hipblasHandle_t] handle to the hipblas library context queue. |
| [in] | uplo | [hipblasFillMode_t] specifies whether the upper 'HIPBLAS_FILL_MODE_UPPER' or lower 'HIPBLAS_FILL_MODE_LOWER' |
| [in] | diag | [hipblasDiagType_t] = 'HIPBLAS_DIAG_NON_UNIT', A is non-unit triangular; = 'HIPBLAS_DIAG_UNIT', A is unit triangular; |
| [in] | n | [int] |
| [in] | AP | device pointer pointing to address of first matrix A_1. |
| [in] | lda | [int] specifies the leading dimension of each A. |
| [in] | strideA | [hipblasStride] "batch stride a": stride from the start of one A_i matrix to the next A_(i + 1). |
| [out] | invA | device pointer storing the inverses of each matrix A_i. Partial inplace operation is supported, see below. If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store the inverse of the upper triangular matrix, and the strictly lower triangular part of invA is cleared. If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store the inverse of the lower triangular matrix, and the strictly upper triangular part of invA is cleared. |
| [in] | ldinvA | [int] specifies the leading dimension of each invA_i. |
| [in] | stride_invA | [hipblasStride] "batch stride invA": stride from the start of one invA_i matrix to the next invA_(i + 1). |
| [in] | batchCount | [int] numbers of matrices in the batch |
1.8.17